def prepare_trainer(self, generator_loss, discriminator_loss): '''Helper function for init_opt''' all_vars = tf.trainable_variables() # 给出所有的训练的变量 # 所有变量命名格式 g_ d_ 来区分 G D 变量 g_vars = [var for var in all_vars if var.name.startswith('g_')] d_vars = [var for var in all_vars if var.name.startswith('d_')] # 定义G的优化op: # 先定义优化器 generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) #好,我现在的定义好了G的训练器 (优化器的选择, 训练的loss目标, 以及训练的变量(其他变量不求导,fixed)) self.generator_trainer =\ pt.apply_optimizer(generator_opt, losses=[generator_loss], var_list=g_vars) discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) self.discriminator_trainer =\ pt.apply_optimizer(discriminator_opt, losses=[discriminator_loss], var_list=d_vars) self.log_vars.append(("g_learning_rate", self.generator_lr)) self.log_vars.append(("d_learning_rate", self.discriminator_lr))
def prepare_trainer(self, generator_loss, discriminator_loss): '''Helper function for init_opt''' all_vars = tf.trainable_variables() g_vars = [var for var in all_vars if var.name.startswith('g_')] d_vars = [var for var in all_vars if var.name.startswith('d_')] generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) self.generator_trainer =\ pt.apply_optimizer(generator_opt, losses=[generator_loss], var_list=g_vars) discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) self.discriminator_trainer =\ pt.apply_optimizer(discriminator_opt, losses=[discriminator_loss], var_list=d_vars) self.log_vars.append(("g_learning_rate", self.generator_lr)) self.log_vars.append(("d_learning_rate", self.discriminator_lr))
def prepare_trainer(self, generator_loss, discriminator_loss, encoder_loss): '''Helper function for init_opt''' all_vars = tf.trainable_variables() if cfg.TRAIN.GENERATOR: g_vars = [var for var in all_vars if var.name.startswith('g_')] generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) self.generator_trainer =\ pt.apply_optimizer(generator_opt, losses=[generator_loss], var_list=g_vars) self.log_vars.append(("e_learning_rate", self.encoder_lr)) if cfg.TRAIN.SUPERVISED and cfg.TRAIN.ENCODER: e_vars = [var for var in all_vars if var.name.startswith('e_')] encoder_opt = tf.train.AdamOptimizer(self.encoder_lr, beta1=0.5) self.encoder_trainer =\ pt.apply_optimizer(encoder_opt, losses=[encoder_loss], var_list=e_vars) self.log_vars.append(("g_learning_rate", self.generator_lr)) if cfg.TRAIN.DISCRIMINATOR: d_vars_to_train = [] if cfg.TRAIN.DISCRIMINATOR_IMAGES: d_i_vars = [var for var in all_vars if var.name.startswith('d_i_')] d_vars_to_train += d_i_vars if cfg.TRAIN.DISCRIMINATOR_LATENTS: d_l_vars = [var for var in all_vars if var.name.startswith('d_l_')] d_vars_to_train += d_l_vars if cfg.TRAIN.DISCRIMINATOR_FUSION: d_f_vars = [var for var in all_vars if var.name.startswith('d_f')] d_vars_to_train += d_f_vars discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) self.discriminator_trainer =\ pt.apply_optimizer(discriminator_opt, losses=[discriminator_loss], var_list=d_vars_to_train) self.log_vars.append(("d_learning_rate", self.discriminator_lr))
def main(_=None): # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) # Create our model. The result of softmax_classifier is a namedtuple # that has members result.loss and result.softmax. if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only include this part of the graph when testing, so we need to specify # that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = result.softmax.evaluate_classifier( labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(10): # Shuffle the training data. train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) runner.train_model( train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
def test_queues(self): qr = FakeQueueRunner() tf.train.add_queue_runner(qr) runner = local_trainer.Runner() with tf.Session(): optimizer = tf.train.GradientDescentOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model(train_op, self.softmax_result.loss, 100, (self.input, self.target), self.xor_data, print_every=2) with tf.Session(): with self.assertRaisesRegexp(ValueError, r'.*\bstop_queues\b.*'): runner.train_model(train_op, self.softmax_result.loss, 100, (self.input, self.target), self.xor_data, print_every=2) runner.stop_queues() qr.assert_worked(self)
def __init__(self): self.data_directory = os.path.join(FLAGS.working_directory, "MNIST") if not os.path.exists(self.data_directory): os.makedirs(self.data_directory) self.save_path = FLAGS.working_directory + '/save.ckpt' self.mnist = read_data_set("/tmp/vae/converted_java.npy") self.input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 28 * 28]) with pt.defaults_scope(activation_fn=tf.nn.elu, batch_normalize=True, learned_moments_update_rate=0.0003, variance_epsilon=0.001, scale_after_normalization=True): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("model") as scope: self.output_tensor, self.mean, self.stddev = decoder(encoder(self.input_tensor)) with pt.defaults_scope(phase=pt.Phase.test): with tf.variable_scope("model", reuse=True) as scope: self.sampled_tensor, _, _ = decoder() self.vae_loss = get_vae_cost(self.mean, self.stddev) self.rec_loss = get_reconstruction_cost(self.output_tensor, self.input_tensor) self.loss = self.vae_loss + self.rec_loss self.optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0) self.train = pt.apply_optimizer(self.optimizer, losses=[self.loss]) self.init = tf.initialize_all_variables() self.saver = tf.train.Saver()
def main(_=None): # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) # Create our model. The result of softmax_classifier is a namedtuple # that has members result.loss and result.softmax. if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only include this part of the graph when testing, so we need to specify # that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(10): # Shuffle the training data. train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) runner.train_model( train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
def run_model(result): accuracy = result.softmax.evaluate_classifier\ (labels_placeholder,phase=pt.Phase.test) train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in range(0, 10): train_images, train_labels = \ data_utils.permute_data\ ((train_images, train_labels)) runner.train_model(train_op,result.\ loss,EPOCH_SIZE,\ feed_vars=(image_placeholder,\ labels_placeholder),\ feed_data=pt.train.\ feed_numpy(BATCH_SIZE,\ train_images,\ train_labels),\ print_every=100) classification_accuracy = runner.evaluate_model\ (accuracy,\ TEST_SIZE,\ feed_vars=(image_placeholder,\ labels_placeholder),\ feed_data=pt.train.\ feed_numpy(BATCH_SIZE,\ test_images,\ test_labels)) print("epoch", epoch + 1) print("accuracy", classification_accuracy)
def test_eval(self): f = os.path.join(self.tmp_file, "checkpoint") runner = local_trainer.Runner(save_path=f) with tf.Session(): classification_acuracy = self.softmax_result.softmax.evaluate_classifier(self.target, phase=pt.Phase.test) optimizer = tf.train.GradientDescentOptimizer(0.2) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model( train_op, self.softmax_result.loss, 100, (self.input, self.target), self.xor_data, print_every=50 ) self.assertTrue(runner._last_init) save_paths = list(runner._saver.last_checkpoints) # The accuracy should be 50% right now since model is consistently # generated. accuracy = runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data) self.assertEquals(runner._saver.last_checkpoints, save_paths, "No additional paths should have been saved.") self.assertFalse(runner._last_init) self.assertEqual(accuracy, 0.5) # Train the model to 100% accuracy. runner.train_model( train_op, self.softmax_result.loss, 2000, (self.input, self.target), self.xor_data, print_every=1000 ) accuracy = runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data) self.assertFalse(runner._last_init) # Make sure that the previous computation didn't impact this eval. self.assertEqual(accuracy, 1.0)
def define_one_trainer(self, loss, learning_rate, key_word): '''Helper function for init_opt''' all_vars = tf.trainable_variables() tarin_vars = [var for var in all_vars if var.name.startswith(key_word)] opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5) trainer = pt.apply_optimizer(opt, losses=[loss], var_list=tarin_vars) return trainer
def train_neural_network(X, Y): ''' predict = convolutional_neural_network(X) cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predict, labels=Y)) optimizer = tf.train.AdamOptimizer().minimize(cost_func) # learning rate 默认 0.001 correct = tf.equal(tf.argmax(predict, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct, 'float')) ''' # 使用prettytensor input_tensor = tf.reshape(X, [-1, 28, 28, 1]) pretty_input = pt.wrap(input_tensor) predict, cost_func = (pretty_input. \ conv2d(kernel=5, depth=32, name='layer_conv1'). \ max_pool(kernel=2, stride=2). \ conv2d(kernel=5, depth=64, name='layer_conv2'). \ max_pool(kernel=2, stride=2). \ flatten(). \ fully_connected(size=1024, name='layer_fc1'). \ softmax_classifier(num_classes=n_output_layer, labels=Y) ) accuracy = predict.evaluate_classifier(Y) optimizer_ = tf.train.GradientDescentOptimizer(0.1) # learning rate optimizer = pt.apply_optimizer(optimizer_, losses=[cost_func]) epochs = 1 with tf.Session() as session: session.run(tf.global_variables_initializer()) epoch_loss = 0 for epoch in range(epochs): for step in range(mnist.train.num_examples // batch_size): x, y = mnist.train.next_batch(batch_size) _, c = session.run([optimizer, cost_func], feed_dict={ X: x, Y: y }) # epoch_loss += c # print(epoch, ' : ', epoch_loss) if step % 20 == 0: print( 'epoch', epoch, '\t|', 'step', step, '\t|', '\033[1;35m train acc \033[0m', accuracy.eval({ X: x, Y: y }), '\t|' '\033[1;35m test acc \033[0m', '\033[1;34m ' + str( accuracy.eval({ X: mnist.test.images, Y: mnist.test.labels })) + '\033[0m') print('准确率: ', accuracy.eval({ X: mnist.test.images, Y: mnist.test.labels }))
def test_run(self): runner = local_trainer.Runner() with tf.Session(): optimizer = tf.train.GradientDescentOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model( train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2 )
def main(_=None): print('Starting Baby Names') input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = lstm_func.reshape_data_to_lstm_format(input_placeholder) # Create a label for each timestep. lables_1 = tf.reshape(tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]) labels = lstm_func.reshape_data_to_lstm_format(lables_1, per_example_length=2) length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) t = tf.concat(1, [tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder]) per_example_weights = lstm_func.reshape_data_to_lstm_format(tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) with tf.variable_scope('baby_names', reuse=True): test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test, per_example_weights=per_example_weights) batch_accuracy = result.softmax.evaluate_classifier(labels, phase=pt.Phase.train, per_example_weights=per_example_weights) names, sex, lengths = data_baby_names.baby_names(TIMESTEPS) epoch_size = len(names) // BATCH_SIZE optimizer = tf.train.AdagradOptimizer(tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25) sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) # with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = permute.permute_data((names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print('Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100))
def train_op_loss(self, input_placeholder, labels, reuse=None): # Training and eval graph with tf.variable_scope(self.name, reuse=reuse): # Core train graph result = self.create(input_placeholder, pt.Phase.train).softmax(labels) train_op = pt.apply_optimizer(tf.train.AdagradOptimizer(0.5), losses=[result.loss]) return train_op, result.loss
def main(_=None): image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 22, 95]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 2]) # Create our model. The result of softmax_classifier is a namedtuple # that has members result.loss and result.softmax. images = pt.wrap(tf.expand_dims(image_placeholder, -1)) with pt.defaults_scope(activation_fn=tf.nn.relu, l2loss=0.00001): result = (images .conv2d(5, 20) # .max_pool(2, 2) .conv2d(5, 50) # .max_pool(2, 2) .flatten() .fully_connected(500) .dropout(0.5) .softmax_classifier(2, labels_placeholder)) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels, test_images, test_labels = prepare_data() # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdamOptimizer() train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): print('Initializing') tf.initialize_all_variables().run() for epoch in xrange(EPOCHS): # Shuffle the training data. train_images, train_labels = permute_data( (train_images, train_labels)) runner.train_model( train_op, result.loss, len(train_images), feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, len(test_images), feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % ( epoch + 1, classification_accuracy * 100))
def test_run(self): runner = local_trainer.Runner() with tf.Session(): optimizer = tf.train.GradientDescentOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model(train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2)
def initialize(self, n_iter=1000, n_data=None, n_print=100, optimizer=None, sess=None): """ Initialize inference algorithm. Parameters ---------- n_iter : int, optional Number of iterations for optimization. n_data : int, optional Number of samples for data subsampling. Default is to use all the data. n_print : int, optional Number of iterations for each print progress. optimizer : str, optional Whether to use TensorFlow optimizer or PrettyTensor optimizer if using PrettyTensor. Defaults to TensorFlow. sess : tf.Session, optional TensorFlow session for computation. """ self.n_iter = n_iter self.n_data = n_data self.n_print = n_print self.loss = tf.constant(0.0) loss = self.build_loss() if optimizer == None: # Use ADAM with a decaying scale factor global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) self.train = optimizer.minimize(loss, global_step=global_step) else: optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) self.train = pt.apply_optimizer(optimizer, losses=[loss]) init = tf.initialize_all_variables() if sess == None: sess = tf.Session() sess.run(init) return sess
def initialize(self, n_iter=1000, n_data=None, n_print=100, optimizer=None, scope=None): """Initialize variational inference algorithm. Set up ``tf.train.AdamOptimizer`` with a decaying scale factor. Initialize all variables Parameters ---------- n_iter : int, optional Number of iterations for optimization. n_data : int, optional Number of samples for data subsampling. Default is to use all the data. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify None. optimizer : str, optional Whether to use TensorFlow optimizer or PrettyTensor optimizer when using PrettyTensor. Defaults to TensorFlow. scope : str, optional Scope of TensorFlow variable objects to optimize over. """ self.n_iter = n_iter self.n_data = n_data self.n_print = n_print self.loss = tf.constant(0.0) loss = self.build_loss() if optimizer is None: var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) # Use ADAM with a decaying scale factor global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) self.train = optimizer.minimize(loss, global_step=global_step, var_list=var_list) else: if scope is not None: raise NotImplementedError("PrettyTensor optimizer does not accept a variable scope.") optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) self.train = pt.apply_optimizer(optimizer, losses=[loss]) init = tf.initialize_all_variables() init.run()
def main(_=None): image_shape = inp.get_image_shape(FLAGS.input_folder) batch_shape = (BATCH_SIZE,) + image_shape print('>>', image_shape, batch_shape) image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) if FLAGS.model == 'full': print('fully connected network') result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': print('conv network') result = lenet5(image_placeholder, labels_placeholder) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) # Grab the data as numpy arrays. train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) print(train_images.shape) print(train_labels.shape) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(20): # Shuffle the training data. train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) train_images = inp.get_images(FLAGS.input_folder) runner.train_model( train_op, result.loss, _epoch_size, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, _test_size, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels)) print('Accuracy after %d epoch %g%%' % ( epoch + 1, classification_accuracy * 100))
def test_checkpoint(self): f = os.path.join(self.tmp_file, "checkpoint") runner = local_trainer.Runner(save_path=f) with tf.Session(): optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model( train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2 ) assert runner._saver.last_checkpoints, "Expected checkpoints." for x in runner._saver.last_checkpoints: self.assertTrue(os.path.isfile(x), "Promised file not saved: %s" % x) self.assertTrue(x.startswith(f), "Name not as expected: %s" % x)
def train(self, epochs, batch_size, learning_rate, save_to=None): self.train_step = pt.apply_optimizer(tf.train.AdamOptimizer(learning_rate, epsilon=1), losses = [self.error_function]) init = tf.initialize_all_variables() self.sess.run(init) pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=epochs).start() while self.get_epoch() < epochs: input_data = self.hdf5reader.next() _, loss_value = self.sess.run( [self.train_step, self.error_function], { self.encoder.input_data: input_data } ) pbar.update(self.get_epoch()) pbar.finish()
def initialize(self, n_data=None): # TODO refactor to use VariationalInference's initialize() self.n_data = n_data # TODO don't fix number of covariates self.x = tf.placeholder(tf.float32, [self.n_data, 28 * 28]) self.losses = tf.constant(0.0) loss = self.build_loss() optimizer = tf.train.AdamOptimizer(1e-2, epsilon=1.0) # TODO move this to not rely on Pretty Tensor self.train = pt.apply_optimizer(optimizer, losses=[loss]) init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) return sess
def restore_helper(self, runner): with tf.Session(): classification_acuracy = self.softmax_result.softmax.evaluate_classifier(self.target, phase=pt.Phase.test) optimizer = tf.train.GradientDescentOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model( train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2 ) self.assertTrue(runner._last_init) self.assertFalse(runner._last_restore) with tf.Session(): save_paths = list(runner._saver.last_checkpoints) runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data) self.assertEquals(runner._saver.last_checkpoints, save_paths, "No additional paths should have been saved.") self.assertFalse(runner._last_init)
def test_checkpoint(self): f = os.path.join(self.tmp_file, 'checkpoint') runner = local_trainer.Runner(save_path=f) with tf.Session(): optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model(train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2) assert runner._saver.last_checkpoints, 'Expected checkpoints.' for x in runner._saver.last_checkpoints: self.assertTrue(os.path.isfile(x), 'Promised file not saved: %s' % x) self.assertTrue(x.startswith(f), 'Name not as expected: %s' % x)
def test_eval(self): f = os.path.join(self.tmp_file, 'checkpoint') runner = local_trainer.Runner(save_path=f) with tf.Session(): classification_acuracy = self.softmax_result.softmax.evaluate_classifier( self.target, phase=pt.Phase.test) optimizer = tf.train.GradientDescentOptimizer(0.2) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model(train_op, self.softmax_result.loss, 100, (self.input, self.target), self.xor_data, print_every=50) self.assertTrue(runner._last_init) save_paths = list(runner._saver.last_checkpoints) # The accuracy should be 50% right now since model is consistently # generated. accuracy = runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data) self.assertEquals(runner._saver.last_checkpoints, save_paths, 'No additional paths should have been saved.') self.assertFalse(runner._last_init) self.assertEqual(accuracy, 0.5) # Train the model to 100% accuracy. runner.train_model(train_op, self.softmax_result.loss, 2000, (self.input, self.target), self.xor_data, print_every=1000) accuracy = runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data) self.assertFalse(runner._last_init) # Make sure that the previous computation didn't impact this eval. self.assertEqual(accuracy, 1.0)
def main(_=None): image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28 * 28]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # 数据 mnist = data_mnist.read_data_sets(one_hot=True) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(10): # 训练 runner.train_model( train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, mnist.train.images, mnist.train.labels), print_every=100) # 正确率 classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, mnist.test.images, mnist.test.labels)) print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
def restore_helper(self, runner): with tf.Session(): classification_acuracy = self.softmax_result.softmax.evaluate_classifier( self.target, phase=pt.Phase.test) optimizer = tf.train.GradientDescentOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss]) runner.train_model(train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2) self.assertTrue(runner._last_init) self.assertFalse(runner._last_restore) with tf.Session(): save_paths = list(runner._saver.last_checkpoints) runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data) self.assertEquals(runner._saver.last_checkpoints, save_paths, 'No additional paths should have been saved.') self.assertFalse(runner._last_init)
def main(_=None): image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28*28]) labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10]) if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # 数据 mnist = data_mnist.read_data_sets(one_hot=True) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(10): # 训练 runner.train_model( train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data= pt.train.feed_numpy(BATCH_SIZE, mnist.train.images, mnist.train.labels), print_every=100) # 正确率 classification_accuracy = runner.evaluate_model( accuracy, TEST_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, mnist.test.images, mnist.test.labels)) print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
def main(_=None): print 'Starting Baby Names' # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = data_utils.reshape_data(input_placeholder) # Create a label for each timestep. labels = data_utils.reshape_data(tf.reshape( tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]), per_example_length=2) # We also need to set per example weights so that the softmax doesn't output a # prediction on intermediate nodes. length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) # We need a dense multiplier for the per example weights. The only place # that has a non-zero loss is the first EOS after the last character of the # name; the characters in the name and the trailing EOS characters are given a # 0 loss by assigning the weight to 0.0 and in the end only one character in # each batch has a weight of 1.0. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. # Since we want to assign 1 value for each row, the first dimension can just # be a sequence. t = tf.concat(1, [ tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder ]) # Squeeze removes dimensions that are equal to 1. per_example_weights must # end up as 1 dimensional. per_example_weights = data_utils.reshape_data( tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() # We need 2 copies of the graph that share variables. The first copy runs # training and will do dropout if specified and the second will not include # dropout. Dropout is controlled by the phase argument, which sets the mode # consistently throughout a graph. with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('baby_names', reuse=True): # Some ops have different behaviors in test vs train and these take a phase # argument. test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = test_result.softmax.evaluate_classifier( labels, phase=pt.Phase.test, per_example_weights=per_example_weights) # We can also compute a batch accuracy to monitor progress. batch_accuracy = result.softmax.evaluate_classifier( labels, phase=pt.Phase.train, per_example_weights=per_example_weights) # Grab the inputs, outputs and lengths as numpy arrays. # Lengths could have been calculated from names, but it was easier to # calculate inside the utility function. names, sex, lengths = data_utils.baby_names(TIMESTEPS) epoch_size = len(names) / BATCH_SIZE # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. # This sequence model does very well with initially high rates. optimizer = tf.train.AdagradOptimizer( tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = data_utils.permute_data( (names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print 'Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100)
def train_mnist_discrim(prior, lossmetric="KL"): '''Train model to output transformation that prevents leaking private info using a discriminator to aid producing natural images ''' data_dir = os.path.join(FLAGS.working_directory, "data") mnist_dir = os.path.join(data_dir, "mnist") model_directory = os.path.join( mnist_dir, lossmetric + "discrim_privacy_checkpoints" + str(encode_coef)) input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.input_size]) output_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.output_size]) private_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.private_size]) rawc_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size]) prior_tensor = tf.constant(prior, tf.float32, [FLAGS.private_size]) #load data not necessary for mnist data mnist = input_data.read_data_sets(mnist_dir, one_hot=True) def get_feed(batch_no, training): if training: x, c = mnist.train.next_batch(FLAGS.batch_size) else: x, c = mnist.test.next_batch(FLAGS.batch_size) rawc = np.argmax(c, axis=1) return { input_tensor: x, output_tensor: x, private_tensor: c[:, :FLAGS.private_size], rawc_tensor: rawc } #instantiate model with pt.defaults_scope(activation_fn=tf.nn.relu, batch_normalize=True, learned_moments_update_rate=3e-4, variance_epsilon=1e-3, scale_after_normalization=True): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("encoder") as scope: z = dvibcomp.privacy_encoder(input_tensor, private_tensor) encode_params = tf.trainable_variables() e_param_len = len(encode_params) with tf.variable_scope("decoder") as scope: xhat, chat, mean, stddev = dvibcomp.mnist_predictor(z) all_params = tf.trainable_variables() d_param_len = len(all_params) - e_param_len with tf.variable_scope("discrim") as scope: D1 = dvibcomp.mnist_discriminator( input_tensor) # positive samples with tf.variable_scope("discrim", reuse=True) as scope: D2 = dvibcomp.mnist_discriminator(xhat) # negative samples all_params = tf.trainable_variables() discrim_len = len(all_params) - (d_param_len + e_param_len) # Calculating losses _, KLloss = dvibloss.encoding_cost(xhat, chat, input_tensor, private_tensor, prior_tensor) loss2x, loss2c = dvibloss.recon_cost(xhat, chat, input_tensor, private_tensor, softmax=True) loss_g = dvibloss.get_gen_cost(D2) loss_d = dvibloss.get_discrim_cost(D1, D2) loss_vae = dvibloss.get_vae_cost(mean, stddev) # Record losses of MI approximation and sibson MI h_c, h_cz, _, _ = dvibloss.MI_approx(input_tensor, private_tensor, rawc_tensor, xhat, chat, z) I_c_cz = tf.abs(h_c - h_cz) # use alpha = 3 first, may be tuned sibMI_c_cz = dvibloss.sibsonMI_approx(z, chat, 3) # Compose losses if lossmetric == "KL": loss1 = encode_coef * loss_g + KLloss if lossmetric == "MI": loss1 = encode_coef * loss_g + I_c_cz if lossmetric == "sibMI": loss1 = encode_coef * loss_g + sibMI_c_cz loss2 = decode_coef * loss_g + loss2c loss3 = loss_d with tf.name_scope('pub_prediction'): with tf.name_scope('pub_distance'): pub_dist = tf.reduce_mean((xhat - output_tensor)**2) with tf.name_scope('sec_prediction'): with tf.name_scope('sec_distance'): sec_dist = tf.reduce_mean((chat - private_tensor)**2) #correct_pred = tf.less(tf.abs(chat - private_tensor), 0.5) correct_pred = tf.equal(tf.argmax(chat, axis=1), tf.argmax(private_tensor, axis=1)) sec_acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0) e_train = pt.apply_optimizer( optimizer, losses=[loss1], regularize=True, include_marked=True, var_list=encode_params) # privatizer/encoder training op g_train = pt.apply_optimizer( optimizer, losses=[loss2], regularize=True, include_marked=True, var_list=all_params[e_param_len:]) # generator/decoder training op d_train = pt.apply_optimizer( optimizer, losses=[loss3], regularize=True, include_marked=True, var_list=all_params[e_param_len + d_param_len:]) # discriminator training op # Logging matrices e_loss_train = np.zeros(FLAGS.max_epoch) g_loss_train = np.zeros(FLAGS.max_epoch) d_loss_train = np.zeros(FLAGS.max_epoch) pub_dist_train = np.zeros(FLAGS.max_epoch) sec_dist_train = np.zeros(FLAGS.max_epoch) loss2x_train = np.zeros(FLAGS.max_epoch) loss2c_train = np.zeros(FLAGS.max_epoch) KLloss_train = np.zeros(FLAGS.max_epoch) MIloss_train = np.zeros(FLAGS.max_epoch) sibMIloss_train = np.zeros(FLAGS.max_epoch) sec_acc_train = np.zeros(FLAGS.max_epoch) e_loss_val = np.zeros(FLAGS.max_epoch) g_loss_val = np.zeros(FLAGS.max_epoch) d_loss_val = np.zeros(FLAGS.max_epoch) pub_dist_val = np.zeros(FLAGS.max_epoch) sec_dist_val = np.zeros(FLAGS.max_epoch) loss2x_val = np.zeros(FLAGS.max_epoch) loss2c_val = np.zeros(FLAGS.max_epoch) KLloss_val = np.zeros(FLAGS.max_epoch) MIloss_val = np.zeros(FLAGS.max_epoch) sibMIloss_val = np.zeros(FLAGS.max_epoch) sec_acc_val = np.zeros(FLAGS.max_epoch) xhat_val = [] # Tensorboard logging #tf.summary.scalar('KL', KLloss) #tf.summary.scalar('loss_x', loss2x) #tf.summary.scalar('loss_c', loss2c) #tf.summary.scalar('pub_dist', pub_dist) #tf.summary.scalar('sec_dist', sec_dist) init = tf.global_variables_initializer() saver = tf.train.Saver() # Config session for memory config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.8 config.log_device_placement = False sess = tf.Session(config=config) sess.run(init) #merged = tf.summary.merge_all() #train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train', sess.graph) #test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test') for epoch in range(FLAGS.max_epoch): widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(maxval=FLAGS.updates_per_epoch, widgets=widgets) pbar.start() pub_loss = 0 sec_loss = 0 sec_accv = 0 e_training_loss = 0 g_training_loss = 0 d_training_loss = 0 KLv = 0 MIv = 0 sibMIv = 0 loss2xv = 0 loss2cv = 0 #pdb.set_trace() for i in range(FLAGS.updates_per_epoch): pbar.update(i) feeds = get_feed(i, True) #zv, xhatv, chatv, meanv, stddevv, sec_pred = sess.run([z, xhat, chat, mean, stddev, correct_pred], feeds) pub_tmp, sec_tmp, sec_acc_tmp, KLtmp, MItmp, sibMItmp, loss2xtmp, loss2ctmp, loss3tmp = sess.run( [ pub_dist, sec_dist, sec_acc, KLloss, I_c_cz, sibMI_c_cz, loss2x, loss2c, loss_vae ], feeds) #_, e_loss_value, _, g_loss_value, _, d_loss_value = sess.run([e_train, loss1, g_train, loss2, d_train, loss3], feeds) _, e_loss_value = sess.run([e_train, loss1], feeds) _, g_loss_value = sess.run([g_train, loss2], feeds) _, d_loss_value = sess.run([d_train, loss3], feeds) if (np.isnan(e_loss_value) or np.isnan(g_loss_value) or np.isnan(d_loss_value)): pdb.set_trace() break #train_writer.add_summary(summary, i) e_training_loss += e_loss_value g_training_loss += g_loss_value d_training_loss += d_loss_value pub_loss += pub_tmp sec_loss += sec_tmp sec_accv += sec_acc_tmp KLv += KLtmp MIv += MItmp sibMIv += sibMItmp loss2xv += loss2xtmp loss2cv += loss2ctmp e_training_loss = e_training_loss / \ (FLAGS.updates_per_epoch) g_training_loss = g_training_loss / \ (FLAGS.updates_per_epoch) d_training_loss = d_training_loss / \ (FLAGS.updates_per_epoch) pub_loss /= (FLAGS.updates_per_epoch) sec_loss /= (FLAGS.updates_per_epoch) sec_accv /= (FLAGS.updates_per_epoch) loss2xv /= (FLAGS.updates_per_epoch) loss2cv /= (FLAGS.updates_per_epoch) KLv /= (FLAGS.updates_per_epoch) MIv /= (FLAGS.updates_per_epoch) sibMIv /= (FLAGS.updates_per_epoch) print("Loss for E %f, for G %f, for D %f" % (e_training_loss, g_training_loss, d_training_loss)) print('Training public loss at epoch %s: %s' % (epoch, pub_loss)) print('Training private loss at epoch %s: %s, private accuracy: %s' % (epoch, sec_loss, sec_accv)) e_loss_train[epoch] = e_training_loss g_loss_train[epoch] = g_training_loss d_loss_train[epoch] = d_training_loss pub_dist_train[epoch] = pub_loss sec_dist_train[epoch] = sec_loss loss2x_train[epoch] = loss2xv loss2c_train[epoch] = loss2cv KLloss_train[epoch] = KLv MIloss_train[epoch] = MIv sibMIloss_train[epoch] = sibMIv sec_acc_train[epoch] = sec_accv # Forced Garbage Collection gc.collect() # Validation if epoch % 10 == 9: pub_loss = 0 sec_loss = 0 e_val_loss = 0 g_val_loss = 0 d_val_loss = 0 loss2xv = 0 loss2cv = 0 KLv = 0 MIv = 0 sec_accv = 0 for i in range(int(FLAGS.test_dataset_size / FLAGS.batch_size)): feeds = get_feed(i, False) e_val_tmp, g_val_tmp, d_val_tmp, pub_loss, sec_loss, MItmp, sibMItmp, KLtmp, loss2xtmp, loss2ctmp, sec_acc_tmp = sess.run( [ loss1, loss2, loss3, pub_dist, sec_dist, I_c_cz, sibMI_c_cz, KLloss, loss2x, loss2c, sec_acc ], feeds) if (epoch >= FLAGS.max_epoch - 10): xhat_val.extend(sess.run(xhat, feeds)) #test_writer.add_summary(summary, i) e_val_loss += e_val_tmp g_val_loss += g_val_tmp d_val_loss += d_val_tmp sec_accv += sec_acc_tmp KLv += KLtmp MIv += MItmp sibMIv += sibMItmp loss2xv += loss2xtmp loss2cv += loss2ctmp pub_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) sec_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) e_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) g_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) d_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) loss2xv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) loss2cv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) KLv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) MIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) sibMIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) sec_accv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) print('Test public loss at epoch %s: %s' % (epoch, pub_loss)) print('Test private loss at epoch %s: %s' % (epoch, sec_loss)) e_loss_val[epoch] = e_val_loss g_loss_val[epoch] = g_val_loss d_loss_val[epoch] = d_val_loss pub_dist_val[epoch] = pub_loss sec_dist_val[epoch] = sec_loss loss2x_val[epoch] = loss2xv loss2c_val[epoch] = loss2cv KLloss_val[epoch] = KLv MIloss_val[epoch] = MIv sibMIloss_val[epoch] = sibMIv sec_acc_val[epoch] = sec_accv if not (np.isnan(e_val_loss) or np.isnan(g_val_loss) or np.isnan(d_val_loss)): savepath = saver.save(sess, model_directory + '/mnist_privacy', global_step=epoch) print('Model saved at epoch %s, path is %s' % (epoch, savepath)) gc.collect() np.savez(os.path.join(model_directory, 'synth_trainstats'), e_loss_train=e_loss_train, g_loss_train=g_loss_train, d_loss_train=d_loss_train, pub_dist_train=pub_dist_train, sec_dist_train=sec_dist_train, loss2x_train=loss2x_train, loss2c_train=loss2c_train, KLloss_train=KLloss_train, MIloss_train=MIloss_train, sibMIloss_train=sibMIloss_train, sec_acc_train=sec_acc_train, e_loss_val=e_loss_val, g_loss_val=g_loss_val, d_loss_val=d_loss_val, pub_dist_val=pub_dist_val, sec_dist_val=sec_dist_val, loss2x_val=loss2x_val, loss2c_val=loss2c_val, KLloss_val=KLloss_val, MIloss_val=MIloss_val, sibMIloss_val=sibMIloss_val, sec_acc_val=sec_acc_val, xhat_val=xhat_val) sess.close()
def train_ferg(prior, lossmetric="KL", order=1.01): '''Train model to output transformation that prevents leaking private info ''' data_dir = os.path.join(FLAGS.working_directory, "data") dataset_dir = os.path.join(data_dir, "ferg") model_directory = os.path.join( dataset_dir, lossmetric + "privacy_checkpoints" + str(encode_coef) + '_' + str(decode_coef) + '_' + str(order)) input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.input_size]) output_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.output_size]) private_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.private_size]) prior_tensor = tf.constant(prior, tf.float32, [FLAGS.private_size]) rawc_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size]) rawy_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size]) #load data not necessary for mnist data, formatted as vectors of real values between 0 and 1 #load FERG dataset and shuffle, save, reload fergdata = np.load(os.path.join(dataset_dir, "ferg256.npz")) #fergdataindices = np.random.permutation(FLAGS.dataset_size+FLAGS.test_dataset_size) #fergdataimgs = fergdata['imgs'][fergdataindices] #fergdataidentity = fergdata['identity'][fergdataindices] #fergdataexpression = fergdata['expression'][fergdataindices] #np.savez(os.path.join(dataset_dir, "ferg256.npz"), # imgs = fergdataimgs, # identity = fergdataidentity, # expression = fergdataexpression) #fergdata = np.load(os.path.join(dataset_dir, "ferg256.npz")) def get_feed(batch_no, training, ferg): if training: x = ferg['imgs'][batch_no * FLAGS.batch_size:(batch_no + 1) * FLAGS.batch_size] c = ferg['identity'][batch_no * FLAGS.batch_size:(batch_no + 1) * FLAGS.batch_size] y = ferg['expression'][batch_no * FLAGS.batch_size:(batch_no + 1) * FLAGS.batch_size] else: x = ferg['imgs'][batch_no * FLAGS.batch_size + FLAGS.dataset_size:(batch_no + 1) * FLAGS.batch_size + FLAGS.dataset_size] c = ferg['identity'][batch_no * FLAGS.batch_size + FLAGS.dataset_size:(batch_no + 1) * FLAGS.batch_size + FLAGS.dataset_size] y = ferg['expression'][batch_no * FLAGS.batch_size + FLAGS.dataset_size:(batch_no + 1) * FLAGS.batch_size + FLAGS.dataset_size] x = x.reshape([FLAGS.batch_size, FLAGS.input_size]) # convert labels to one hot encoding cs = np.zeros((FLAGS.batch_size, FLAGS.private_size)) cs[np.arange(FLAGS.batch_size), c] = 1 ys = np.zeros((FLAGS.batch_size, FLAGS.output_size)) ys[np.arange(FLAGS.batch_size), y] = 1 return { input_tensor: x, output_tensor: ys, private_tensor: cs, rawc_tensor: c, rawy_tensor: y } #instantiate model with pt.defaults_scope(activation_fn=tf.nn.relu, batch_normalize=True, learned_moments_update_rate=3e-4, variance_epsilon=1e-3, scale_after_normalization=True): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("encoder") as scope: z = dvibcomp.ferg_encoder(input_tensor) encode_params = tf.trainable_variables() e_param_len = len(encode_params) with tf.variable_scope("decoder") as scope: yhat, chat, mean, stddev = dvibcomp.ferg_twotask_predictor(z) all_params = tf.trainable_variables() d_param_len = len(all_params) - e_param_len # Calculating losses _, KLloss = dvibloss.encoding_cost(yhat, chat, output_tensor, private_tensor, prior_tensor, xmetric="CE", independent=False) loss2x, loss2c = dvibloss.recon_cost(yhat, chat, output_tensor, private_tensor, softmax=True, xmetric="CE") # Record losses of MI approximation and sibson MI h_c, h_cz, _ = dvibloss.MI_approx(input_tensor, private_tensor, rawc_tensor, yhat, chat, z) I_c_cz = tf.abs(h_c - h_cz) # use alpha = 3 first, may be tuned sibMI_c_cz = dvibloss.sibsonMI_approx(z, chat, order, independent=False) # Compose losses if lossmetric == "KL": loss1 = encode_coef * loss2x + KLloss if lossmetric == "MI": loss1 = encode_coef * loss2x + I_c_cz if lossmetric == "sibMI": loss1 = encode_coef * loss2x + sibMI_c_cz loss2 = decode_coef * loss2x + loss2c loss3 = dvibloss.get_vae_cost(mean, stddev) with tf.name_scope('pub_prediction'): with tf.name_scope('pub_distance'): pub_dist = tf.reduce_mean((yhat - output_tensor)**2) correct_predpub = tf.equal(tf.argmax(yhat, axis=1), tf.argmax(output_tensor, axis=1)) pub_acc = tf.reduce_mean(tf.cast(correct_predpub, tf.float32)) with tf.name_scope('sec_prediction'): with tf.name_scope('sec_distance'): sec_dist = tf.reduce_mean((chat - private_tensor)**2) #correct_pred = tf.less(tf.abs(chat - private_tensor), 0.5) correct_pred = tf.equal(tf.argmax(chat, axis=1), tf.argmax(private_tensor, axis=1)) sec_acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0) e_train = pt.apply_optimizer(optimizer, losses=[loss1], regularize=True, include_marked=True, var_list=encode_params) d_train = pt.apply_optimizer(optimizer, losses=[loss2], regularize=True, include_marked=True, var_list=all_params[e_param_len:]) # Logging matrices e_loss_train = np.zeros(FLAGS.max_epoch) d_loss_train = np.zeros(FLAGS.max_epoch) pub_dist_train = np.zeros(FLAGS.max_epoch) sec_dist_train = np.zeros(FLAGS.max_epoch) loss2x_train = np.zeros(FLAGS.max_epoch) loss2c_train = np.zeros(FLAGS.max_epoch) KLloss_train = np.zeros(FLAGS.max_epoch) MIloss_train = np.zeros(FLAGS.max_epoch) sibMIloss_train = np.zeros(FLAGS.max_epoch) pub_acc_train = np.zeros(FLAGS.max_epoch) sec_acc_train = np.zeros(FLAGS.max_epoch) e_loss_val = np.zeros(FLAGS.max_epoch) d_loss_val = np.zeros(FLAGS.max_epoch) pub_dist_val = np.zeros(FLAGS.max_epoch) sec_dist_val = np.zeros(FLAGS.max_epoch) loss2x_val = np.zeros(FLAGS.max_epoch) loss2c_val = np.zeros(FLAGS.max_epoch) KLloss_val = np.zeros(FLAGS.max_epoch) MIloss_val = np.zeros(FLAGS.max_epoch) sibMIloss_val = np.zeros(FLAGS.max_epoch) pub_acc_val = np.zeros(FLAGS.max_epoch) sec_acc_val = np.zeros(FLAGS.max_epoch) yhat_val = [] # Tensorboard logging #tf.summary.scalar('e_loss', loss1) #tf.summary.scalar('KL', KLloss) #tf.summary.scalar('loss_x', loss2x) #tf.summary.scalar('loss_c', loss2c) #tf.summary.scalar('pub_dist', pub_dist) #tf.summary.scalar('sec_dist', sec_dist) init = tf.global_variables_initializer() saver = tf.train.Saver() # Config session for memory config = tf.ConfigProto() #config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.8 config.log_device_placement = False sess = tf.Session(config=config) sess.run(init) #merged = tf.summary.merge_all() #train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train', sess.graph) #test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test') pdb.set_trace() for epoch in range(FLAGS.max_epoch): widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(maxval=FLAGS.updates_per_epoch, widgets=widgets) pbar.start() pub_loss = 0 sec_loss = 0 pub_accv = 0 sec_accv = 0 e_training_loss = 0 d_training_loss = 0 KLv = 0 MIv = 0 sibMIv = 0 loss2xv = 0 loss2cv = 0 for i in range(FLAGS.updates_per_epoch): pbar.update(i) feeds = get_feed(i, True, fergdata) zv, yhatv, chatv, meanv, stddevv, sec_pred = sess.run( [z, yhat, chat, mean, stddev, correct_pred], feeds) pub_tmp, sec_tmp, pub_acc_tmp, sec_acc_tmp = sess.run( [pub_dist, sec_dist, pub_acc, sec_acc], feeds) MItmp, sibMItmp, KLtmp, loss2xtmp, loss2ctmp, loss3tmp = sess.run( [I_c_cz, sibMI_c_cz, KLloss, loss2x, loss2c, loss3], feeds) _, e_loss_value = sess.run([e_train, loss1], feeds) _, d_loss_value = sess.run([d_train, loss2], feeds) if (np.isnan(e_loss_value) or np.isnan(d_loss_value)): pdb.set_trace() break #train_writer.add_summary(summary, i) e_training_loss += e_loss_value d_training_loss += d_loss_value pub_loss += pub_tmp sec_loss += sec_tmp pub_accv += pub_acc_tmp sec_accv += sec_acc_tmp KLv += KLtmp MIv += MItmp sibMIv += sibMItmp loss2xv += loss2xtmp loss2cv += loss2ctmp e_training_loss = e_training_loss / \ (FLAGS.updates_per_epoch) d_training_loss = d_training_loss / \ (FLAGS.updates_per_epoch) pub_loss /= (FLAGS.updates_per_epoch) sec_loss /= (FLAGS.updates_per_epoch) pub_accv /= (FLAGS.updates_per_epoch) sec_accv /= (FLAGS.updates_per_epoch) loss2xv /= (FLAGS.updates_per_epoch) loss2cv /= (FLAGS.updates_per_epoch) KLv /= (FLAGS.updates_per_epoch) MIv /= (FLAGS.updates_per_epoch) sibMIv /= (FLAGS.updates_per_epoch) print("Loss for E %f, and for D %f" % (e_training_loss, d_training_loss)) print('Training public loss at epoch %s: %s, public accuracy: %s' % (epoch, pub_loss, pub_accv)) print('Training private loss at epoch %s: %s, private accuracy: %s' % (epoch, sec_loss, sec_accv)) print('Training KL loss at epoch %s: %s' % (epoch, KLv)) e_loss_train[epoch] = e_training_loss d_loss_train[epoch] = d_training_loss pub_dist_train[epoch] = pub_loss sec_dist_train[epoch] = sec_loss loss2x_train[epoch] = loss2xv loss2c_train[epoch] = loss2cv KLloss_train[epoch] = KLv MIloss_train[epoch] = MIv sibMIloss_train[epoch] = sibMIv pub_acc_train[epoch] = pub_accv sec_acc_train[epoch] = sec_accv # Validation if epoch % 10 == 9: pub_loss = 0 sec_loss = 0 e_val_loss = 0 d_val_loss = 0 loss2xv = 0 loss2cv = 0 KLv = 0 MIv = 0 sibMIv = 0 pub_accv = 0 sec_accv = 0 for i in range(int(FLAGS.test_dataset_size / FLAGS.batch_size)): feeds = get_feed(i, False, fergdata) pub_loss += sess.run(pub_dist, feeds) sec_loss += sess.run(sec_dist, feeds) e_val_loss += sess.run(loss1, feeds) d_val_loss += sess.run(loss2, feeds) zv, yhatv, chatv, meanv, stddevv, sec_pred = sess.run( [z, yhat, chat, mean, stddev, correct_pred], feeds) MItmp, sibMItmp, KLtmp, loss2xtmp, loss2ctmp, pub_acc_tmp, sec_acc_tmp = sess.run( [ I_c_cz, sibMI_c_cz, KLloss, loss2x, loss2c, pub_acc, sec_acc ], feeds) if (epoch >= FLAGS.max_epoch - 10): yhat_val.extend(sess.run(yhat, feeds)) #test_writer.add_summary(summary, i) pub_accv += pub_acc_tmp sec_accv += sec_acc_tmp KLv += KLtmp MIv += MItmp sibMIv += sibMItmp loss2xv += loss2xtmp loss2cv += loss2ctmp pub_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) sec_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) e_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) d_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size) loss2xv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) loss2cv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) KLv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) MIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) sibMIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) pub_accv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) sec_accv /= int(FLAGS.test_dataset_size / FLAGS.batch_size) print('Test public loss at epoch %s: %s, public accuracy: %s' % (epoch, pub_loss, pub_accv)) print('Test private loss at epoch %s: %s, private accuracy: %s' % (epoch, sec_loss, sec_accv)) e_loss_val[epoch] = e_val_loss d_loss_val[epoch] = d_val_loss pub_dist_val[epoch] = pub_loss sec_dist_val[epoch] = sec_loss loss2x_val[epoch] = loss2xv loss2c_val[epoch] = loss2cv KLloss_val[epoch] = KLv MIloss_val[epoch] = MIv sibMIloss_val[epoch] = sibMIv pub_acc_val[epoch] = pub_accv sec_acc_val[epoch] = sec_accv if not (np.isnan(e_loss_value) or np.isnan(d_loss_value)): savepath = saver.save(sess, model_directory + '/ferg_privacy', global_step=epoch) print('Model saved at epoch %s, path is %s' % (epoch, savepath)) np.savez(os.path.join(model_directory, 'ferg_trainstats'), e_loss_train=e_loss_train, d_loss_train=d_loss_train, pub_dist_train=pub_dist_train, sec_dist_train=sec_dist_train, loss2x_train=loss2x_train, loss2c_train=loss2c_train, KLloss_train=KLloss_train, MIloss_train=MIloss_train, sibMIloss_train=sibMIloss_train, pub_acc_train=pub_acc_train, sec_acc_train=sec_acc_train, e_loss_val=e_loss_val, d_loss_val=d_loss_val, pub_dist_val=pub_dist_val, sec_dist_val=sec_dist_val, loss2x_val=loss2x_val, loss2c_val=loss2c_val, KLloss_val=KLloss_val, MIloss_val=MIloss_val, sibMIloss_val=sibMIloss_val, pub_acc_val=pub_acc_val, sec_acc_val=sec_acc_val, yhat_val=yhat_val) sess.close()
G = generator() with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples D_loss = get_discrinator_loss(D1, D2) G_loss = get_generator_loss(D2) learning_rate = tf.placeholder(tf.float32, shape=[]) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1.0) params = tf.trainable_variables() D_params = params[:D_params_num] G_params = params[D_params_num:] # train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params) # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params) train_discrimator = pt.apply_optimizer(optimizer, losses=[D_loss], regularize=True, include_marked=True, var_list=D_params) train_generator = pt.apply_optimizer(optimizer, losses=[G_loss], regularize=True, include_marked=True, var_list=G_params) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for epoch in range(FLAGS.max_epoch): discriminator_loss = 0.0 generator_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(max_value = FLAGS.updates_per_epoch, widgets=widgets) pbar.start() for i in range(FLAGS.updates_per_epoch):
def prepare_trainer(self, loss): vae_opt = tf.train.AdamOptimizer(self.vae_learning_rate) self.vae_trainer = \ pt.apply_optimizer(vae_opt, losses=[loss])
with tf.variable_scope("model") as scope: output_tensor, mean, stddev = decoder(encoder(input_tensor)) D = discriminator(ground_truth_tensor) D_ = discriminator(tf.add(input_tensor, tf.mul(output_tensor, mask_tensor)), reuse=True) with pt.defaults_scope(phase=pt.Phase.test): with tf.variable_scope("model", reuse=True) as scope: sampled_tensor, _, _ = decoder(encoder(input_tensor)) restored_tensor = tf.add(tf.mul(input_tensor, tf.sub(tf.ones_like(mask_tensor),mask_tensor)), tf.mul(sampled_tensor, mask_tensor)) # Restorer reconstruct rec_loss = get_reconstruction_cost(output_tensor, ground_truth_tensor, mask=None, epsilon=1e-12) r_loss = rec_loss # +g_loss r_optim = tf.train.AdamOptimizer(FLAGS.r_learning_rate, epsilon=1e-12) r_train = pt.apply_optimizer(r_optim, losses=[r_loss]) # Discriminator d_sum = tf.histogram_summary("d", D) d__sum = tf.histogram_summary("d_", D_) d_loss_real = ops.binary_cross_entropy_with_logits(tf.ones_like(D), D) d_loss_fake = ops.binary_cross_entropy_with_logits(tf.zeros_like(D_), D_) d_loss_real_sum = tf.scalar_summary("d_loss_real", d_loss_real) d_loss_fake_sum = tf.scalar_summary("d_loss_fake", d_loss_fake) d_loss = d_loss_real + d_loss_fake d_loss_sum = tf.scalar_summary("d_loss", d_loss) t_vars = tf.trainable_variables() d_vars = [var for var in t_vars if 'd_' in var.name] d_optim = tf.train.AdamOptimizer(FLAGS.d_learning_rate, beta1=FLAGS.beta1) \ .minimize(d_loss, var_list=d_vars)
def main(_=None): print('Starting Shakespeare') # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) merged_size = BATCH_SIZE * TIMESTEPS inp = data_utils.reshape_data(input_placeholder) # We need a dense output to calculate loss and accuracy. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. t = tf.concat(1, [ tf.constant( numpy.arange(merged_size).reshape((merged_size, 1)), dtype=tf.int32), data_utils.reshape_data(output_placeholder) ]) labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0) # Some ops have different behaviors in test vs train and these take a phase # argument. with tf.variable_scope('shakespeare'): training_logits = create_model(inp, TIMESTEPS, pt.Phase.train) # Create the result. Softmax applies softmax and creates a cross entropy # loss. The result is a namedtuple. training_result = training_logits.softmax(labels) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdagradOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss]) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # We also want to disable dropout, so we pass the phase to create_model. # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('shakespeare', reuse=True): test_logits = create_model(inp, TIMESTEPS, pt.Phase.test) test_result = test_logits.softmax(labels) # Accuracy creates variables, so make it outside of the above scope. accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test) # Create an inference model so that we can sample. The big difference is # that the input is a single character and it requires reset nodes. with tf.variable_scope('shakespeare', reuse=True): inference_input = tf.placeholder(tf.int32, []) # Needs to be 2 dimensional so that it matches the dims of the other models. reshaped = pt.wrap(inference_input).reshape([1, 1]) inference_logits = create_model(reshaped, 1, pt.Phase.infer) # Grab the data as numpy arrays. shakespeare = data_utils.shakespeare(TIMESTEPS + 1) shakespeare_in = shakespeare[:, :-1] shakespeare_out = shakespeare[:, 1:] # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(FLAGS.epochs): # Shuffle the training data. shakespeare_in, shakespeare_out = data_utils.permute_data( (shakespeare_in, shakespeare_out)) runner.train_model(train_op, training_result.loss, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, shakespeare_in, shakespeare_out), print_every=10) classification_accuracy = runner.evaluate_model( accuracy, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in, shakespeare_out)) print('Next character accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100)) # Use a temperature smaller than 1 because the early stages of the model # don't assign much confidence. print(sample(inference_input, inference_logits, max_length=128, temperature=0.5)) # Print a sampling from the model. print(sample(inference_input, inference_logits))
if FLAGS.model == 'full': result = multilayer_fully_connected(image_placeholder, labels_placeholder) elif FLAGS.model == 'conv': result = lenet5(image_placeholder, labels_placeholder) else: raise ValueError('model must be full or conv: %s' % FLAGS.model) accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test) train_images, train_labels = data_utils.mnist(training=True) test_images, test_labels = data_utils.mnist(training=False) optimizer = tf.train.GradientDescentOptimizer(0.01) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in range(10): train_images, train_labels = data_utils.permute_data( (train_images, train_labels)) runner.train_model(train_op, result.loss, EPOCH_SIZE, feed_vars=(image_placeholder, labels_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, train_images, train_labels), print_every=100) classification_accuracy = runner.evaluate_model(
def initialize(self, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, *args, **kwargs): """Initialize variational inference. Parameters ---------- optimizer : str or tf.train.Optimizer, optional A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. var_list : list of tf.Variable, optional List of TensorFlow variables to optimize over. Default is all trainable variables that ``latent_vars`` and ``data`` depend on, excluding those that are only used in conditionals in ``data``. use_prettytensor : bool, optional ``True`` if aim to use PrettyTensor optimizer (when using PrettyTensor) or ``False`` if aim to use TensorFlow optimizer. Defaults to TensorFlow. global_step : tf.Variable, optional A TensorFlow variable to hold the global step. """ super(VariationalInference, self).initialize(*args, **kwargs) if var_list is None: # Traverse random variable graphs to get default list of variables. var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) if self.logging: summary_key = 'summaries_' + str(id(self)) tf.summary.scalar("loss", self.loss, collections=[summary_key]) for grad, var in grads_and_vars: # replace colons which are an invalid character tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[summary_key]) self.summarize = tf.summary.merge_all(key=summary_key) if optimizer is None and global_step is None: # Default optimizer always uses a global step variable. global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 global_step = None # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") scope = "optimizer_" + str(id(self)) with tf.variable_scope(scope): if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: # Note PrettyTensor optimizer does not accept manual updates; # it autodiffs the loss directly. self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
def main(_=None, weight_init=tf.random_normal, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=50, learning_rate=0.01, prefix=None): tf.reset_default_graph() input_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) # Grab the data as numpy arrays. train_input, train_output = data_utils.mnist(training=True) test_input, test_output = data_utils.mnist(training=False) train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale) test_set = ut.mnist_select_n_classes(test_input, test_output, NUM_CLASSES, min=data_min, scale=data_scale) train_input, train_output = train_set[0], train_set[0] test_input, test_output = test_set[0], test_set[0] ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0]))) visual_inputs, visual_output = train_set[0][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE] epoch_reconstruction = [] EPOCH_SIZE = len(train_input) // BATCH_SIZE TEST_SIZE = len(test_input) // BATCH_SIZE assert_model(input_placeholder, output_placeholder, test_input, test_output, train_input, train_output, visual_inputs, visual_output) with pt.defaults_scope(activation_fn=activation_f, # batch_normalize=True, # learned_moments_update_rate=0.0003, # variance_epsilon=0.001, # scale_after_normalization=True ): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("model") as scope: output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init) pretty_loss = loss(output_tensor, output_placeholder) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = pt.apply_optimizer(optimizer, losses=[pretty_loss]) init = tf.initialize_all_variables() runner = pt.train.Runner(save_path=FLAGS.save_path) best_q = 100000 with tf.Session() as sess: sess.run(init) for epoch in xrange(epochs): # Shuffle the training data. additional_info = '' if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs: reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output}) epoch_reconstruction.append(reconstruct) additional_info += 'epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct)) train_input, train_output = data_utils.permute_data( (train_input, train_output)) runner.train_model( train, pretty_loss, EPOCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output), print_every=None ) accuracy = runner.evaluate_model( pretty_loss, TEST_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output)) ut.print_time('Accuracy after %2d/%d epoch %.2f; %s' % (epoch + 1, epochs, accuracy, additional_info)) if best_q > accuracy: best_q = accuracy save_params = {'suf': 'mn_basic', 'act': activation_f, 'e': epochs, 'opt': optimizer, 'lr': learning_rate, 'init': weight_init, 'acu': int(best_q), 'bs': BATCH_SIZE, 'h': HIDDEN_0_SIZE, 'i':prefix} ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output, save_params=save_params) ut.print_time('Best Quality: %f for %s' % (best_q, ut.to_file_name(save_params))) ut.reset_start_time() return best_q
def init_opt(self): if self.dataset.name == "mnist": shape = [self.dataset.image_dim] elif 'FOLDER' in self.dataset.name: print "Selected folder image" shape = list(self.dataset.output_size) else: shape = [self.dataset.output_size, self.dataset.output_size, 3] self.input_tensor = input_tensor = tf.placeholder( tf.float32, [self.batch_size] + shape) with pt.defaults_scope(phase=pt.Phase.train): self.z_var = self.model.latent_dist.sample_prior(self.batch_size) fake_x, _ = self.model.generate(self.z_var) self.sample_x, _ = self.model.generate(self.z_var) if self.semiSup: self.sup_d = self.model.discriminateSup( self.input_tensor, self.dataset.dataObj.getNclasses()) self.fake_d = self.model.discriminate(fake_x) self.real_d = self.model.discriminate(input_tensor) self.d_feat_real = self.real_d['features'] #Define the generator loss as the intermediate layer error (MSE) self.d_intermediateLayerGenerated = self.model.calcInterLayer( fake_x) self.d_intermediateLayerReal = self.model.calcInterLayer( input_tensor) # generator_loss = tf.reduce_mean(tf.squared_difference(self.d_intermediateLayerGenerated, self.d_intermediateLayerReal)) generator_loss = 0 if self.semiSup: self.input_labels = tf.placeholder( tf.float32, [self.batch_size, self.dataset.dataObj.getNclasses()]) discriminator_loss_sup = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( self.sup_d['logits'], self.input_labels)) discriminator_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( self.real_d['logits'], tf.zeros_like(self.real_d['logits']))) discriminator_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( self.fake_d['logits'], tf.ones_like(self.real_d['logits']))) discriminator_loss = discriminator_loss_real + discriminator_loss_fake + discriminator_loss_sup generator_loss += tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( self.fake_d['logits'], tf.zeros_like(self.fake_d['logits']))) self.log_vars.append( ("discriminator_sup_loss", discriminator_loss_sup)) else: discriminator_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( self.real_d['logits'], tf.ones_like(self.real_d['prob']))) discriminator_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( self.fake_d['logits'], tf.zeros_like(self.fake_d['prob']))) discriminator_loss = discriminator_loss_real + discriminator_loss_fake generator_loss += tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( self.fake_d['logits'], tf.ones_like(self.fake_d['prob']))) self.log_vars.append( ("discriminator_loss_real", discriminator_loss_real)) self.log_vars.append( ("discriminator_loss_fake", discriminator_loss_fake)) self.log_vars.append(("discriminator_loss", discriminator_loss)) self.log_vars.append(("generator_loss", generator_loss)) real_d_sum = tf.histogram_summary("real_d", self.real_d['prob']) fake_d_sum = tf.histogram_summary("fake_d", self.fake_d['prob']) if self.model.is_reg: reg_z = self.model.reg_z(self.z_var) mi_est = tf.constant(0.) cross_ent = tf.constant(0.) # compute for discrete and continuous codes separately # discrete: if len(self.model.reg_disc_latent_dist.dists) > 0: disc_reg_z = self.model.disc_reg_z(reg_z) disc_reg_dist_info = self.model.disc_reg_dist_info( self.fake_d['reg_dist_info'] ) # Returns a dictionary of activations for each distribution disc_log_q_c_given_x = self.model.reg_disc_latent_dist.logli( disc_reg_z, disc_reg_dist_info) disc_log_q_c = self.model.reg_disc_latent_dist.logli_prior( disc_reg_z) disc_cross_ent = tf.reduce_mean(-disc_log_q_c_given_x) disc_ent = tf.reduce_mean(-disc_log_q_c) disc_mi_est = disc_ent - disc_cross_ent mi_est += disc_mi_est cross_ent += disc_cross_ent self.log_vars.append(("MI_disc", disc_mi_est)) self.log_vars.append(("CrossEnt_disc", disc_cross_ent)) discriminator_loss -= self.info_reg_coeff * disc_mi_est generator_loss -= self.info_reg_coeff * disc_mi_est real_disc_reg_dist_info = self.model.disc_reg_dist_info( self.real_d['reg_dist_info']) assert len( real_disc_reg_dist_info.keys() ) == 1 # currently support only one categorical distribution self.disc_prob = real_disc_reg_dist_info[ real_disc_reg_dist_info.keys()[0]] if len(self.model.reg_cont_latent_dist.dists) > 0: cont_reg_z = self.model.cont_reg_z(reg_z) cont_reg_dist_info = self.model.cont_reg_dist_info( self.fake_d['reg_dist_info']) cont_log_q_c_given_x = self.model.reg_cont_latent_dist.logli( cont_reg_z, cont_reg_dist_info) cont_log_q_c = self.model.reg_cont_latent_dist.logli_prior( cont_reg_z) cont_cross_ent = tf.reduce_mean(-cont_log_q_c_given_x) cont_ent = tf.reduce_mean(-cont_log_q_c) cont_mi_est = cont_ent - cont_cross_ent mi_est += cont_mi_est cross_ent += cont_cross_ent self.log_vars.append(("MI_cont", cont_mi_est)) self.log_vars.append(("CrossEnt_cont", cont_cross_ent)) discriminator_loss -= self.info_reg_coeff * cont_mi_est generator_loss -= self.info_reg_coeff * cont_mi_est for idx, dist_info in enumerate( self.model.reg_latent_dist.split_dist_info( self.fake_d['reg_dist_info'])): if "stddev" in dist_info: self.log_vars.append( ("max_std_%d" % idx, tf.reduce_max(dist_info["stddev"]))) self.log_vars.append( ("min_std_%d" % idx, tf.reduce_min(dist_info["stddev"]))) self.log_vars.append(("MI", mi_est)) self.log_vars.append(("CrossEnt", cross_ent)) all_vars = tf.trainable_variables() d_vars = [var for var in all_vars if var.name.startswith('d_')] g_vars = [var for var in all_vars if var.name.startswith('g_')] discriminator_optimizer = tf.train.AdamOptimizer( self.discriminator_learning_rate, beta1=0.5) self.discriminator_trainer = pt.apply_optimizer( discriminator_optimizer, losses=[discriminator_loss], var_list=d_vars) generator_optimizer = tf.train.AdamOptimizer( self.generator_learning_rate, beta1=0.5) self.generator_trainer = pt.apply_optimizer( generator_optimizer, losses=[generator_loss], var_list=g_vars) for k, v in self.log_vars: tf.scalar_summary(k, v) if self.model.is_reg and self.dataset.name != 'imagenet': if self.model.encoder_dim <= 12: # Ugly conditioning!!! Fix later with pt.defaults_scope(phase=pt.Phase.test): with tf.variable_scope("model", reuse=True) as scope: self.visualize_all_factors()
def main(_=None): print('Starting Baby Names') # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = data_utils.reshape_data(input_placeholder) # Create a label for each timestep. labels = data_utils.reshape_data( tf.reshape( tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]), per_example_length=2) # We also need to set per example weights so that the softmax doesn't output a # prediction on intermediate nodes. length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) # We need a dense multiplier for the per example weights. The only place # that has a non-zero loss is the first EOS after the last character of the # name; the characters in the name and the trailing EOS characters are given a # 0 loss by assigning the weight to 0.0 and in the end only one character in # each batch has a weight of 1.0. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. # Since we want to assign 1 value for each row, the first dimension can just # be a sequence. t = tf.concat_v2( [ tf.constant( numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder ], 1) # Squeeze removes dimensions that are equal to 1. per_example_weights must # end up as 1 dimensional. per_example_weights = data_utils.reshape_data(tf.sparse_to_dense( t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() # We need 2 copies of the graph that share variables. The first copy runs # training and will do dropout if specified and the second will not include # dropout. Dropout is controlled by the phase argument, which sets the mode # consistently throughout a graph. with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('baby_names', reuse=True): # Some ops have different behaviors in test vs train and these take a phase # argument. test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = test_result.softmax.evaluate_classifier( labels, phase=pt.Phase.test, per_example_weights=per_example_weights) # We can also compute a batch accuracy to monitor progress. batch_accuracy = result.softmax.evaluate_classifier( labels, phase=pt.Phase.train, per_example_weights=per_example_weights) # Grab the inputs, outputs and lengths as numpy arrays. # Lengths could have been calculated from names, but it was easier to # calculate inside the utility function. names, sex, lengths = data_utils.baby_names(TIMESTEPS) epoch_size = len(names) // BATCH_SIZE # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. # This sequence model does very well with initially high rates. optimizer = tf.train.AdagradOptimizer( tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = data_utils.permute_data((names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print('Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100))
def init_opt(self): self.input_tensor = input_tensor = tf.placeholder(tf.float32, [self.batch_size, self.dataset.image_dim]) with pt.defaults_scope(phase=pt.Phase.train): z_var = self.model.latent_dist.sample_prior(self.batch_size) self.fake_x, _ = self.model.generate(z_var) real_d, _, _, _ = self.model.discriminate(input_tensor) fake_d, _, fake_reg_z_dist_info, _ = self.model.discriminate(self.fake_x) reg_z = self.model.reg_z(z_var) discriminator_loss = - tf.reduce_mean(tf.log(real_d + TINY) + tf.log(1. - fake_d + TINY)) generator_loss = - tf.reduce_mean(tf.log(fake_d + TINY)) self.log_vars.append(("discriminator_loss", discriminator_loss)) self.log_vars.append(("generator_loss", generator_loss)) mi_est = tf.constant(0.) cross_ent = tf.constant(0.) # compute for discrete and continuous codes separately # discrete: if len(self.model.reg_disc_latent_dist.dists) > 0: disc_reg_z = self.model.disc_reg_z(reg_z) disc_reg_dist_info = self.model.disc_reg_dist_info(fake_reg_z_dist_info) disc_log_q_c_given_x = self.model.reg_disc_latent_dist.logli(disc_reg_z, disc_reg_dist_info) disc_log_q_c = self.model.reg_disc_latent_dist.logli_prior(disc_reg_z) disc_cross_ent = tf.reduce_mean(-disc_log_q_c_given_x) disc_ent = tf.reduce_mean(-disc_log_q_c) disc_mi_est = disc_ent - disc_cross_ent mi_est += disc_mi_est cross_ent += disc_cross_ent self.log_vars.append(("MI_disc", disc_mi_est)) self.log_vars.append(("CrossEnt_disc", disc_cross_ent)) discriminator_loss -= self.info_reg_coeff * disc_mi_est generator_loss -= self.info_reg_coeff * disc_mi_est if len(self.model.reg_cont_latent_dist.dists) > 0: cont_reg_z = self.model.cont_reg_z(reg_z) cont_reg_dist_info = self.model.cont_reg_dist_info(fake_reg_z_dist_info) cont_log_q_c_given_x = self.model.reg_cont_latent_dist.logli(cont_reg_z, cont_reg_dist_info) cont_log_q_c = self.model.reg_cont_latent_dist.logli_prior(cont_reg_z) cont_cross_ent = tf.reduce_mean(-cont_log_q_c_given_x) cont_ent = tf.reduce_mean(-cont_log_q_c) cont_mi_est = cont_ent - cont_cross_ent mi_est += cont_mi_est cross_ent += cont_cross_ent self.log_vars.append(("MI_cont", cont_mi_est)) self.log_vars.append(("CrossEnt_cont", cont_cross_ent)) discriminator_loss -= self.info_reg_coeff * cont_mi_est generator_loss -= self.info_reg_coeff * cont_mi_est for idx, dist_info in enumerate(self.model.reg_latent_dist.split_dist_info(fake_reg_z_dist_info)): if "stddev" in dist_info: self.log_vars.append(("max_std_%d" % idx, tf.reduce_max(dist_info["stddev"]))) self.log_vars.append(("min_std_%d" % idx, tf.reduce_min(dist_info["stddev"]))) self.log_vars.append(("MI", mi_est)) self.log_vars.append(("CrossEnt", cross_ent)) all_vars = tf.trainable_variables() d_vars = [var for var in all_vars if var.name.startswith('d_')] g_vars = [var for var in all_vars if var.name.startswith('g_')] self.log_vars.append(("max_real_d", tf.reduce_max(real_d))) self.log_vars.append(("min_real_d", tf.reduce_min(real_d))) self.log_vars.append(("max_fake_d", tf.reduce_max(fake_d))) self.log_vars.append(("min_fake_d", tf.reduce_min(fake_d))) discriminator_optimizer = tf.train.AdamOptimizer(self.discriminator_learning_rate, beta1=0.5) self.discriminator_trainer = pt.apply_optimizer(discriminator_optimizer, losses=[discriminator_loss], var_list=d_vars) generator_optimizer = tf.train.AdamOptimizer(self.generator_learning_rate, beta1=0.5) self.generator_trainer = pt.apply_optimizer(generator_optimizer, losses=[generator_loss], var_list=g_vars) for k, v in self.log_vars: tf.scalar_summary(k, v) with pt.defaults_scope(phase=pt.Phase.test): with tf.variable_scope("model", reuse=True) as scope: self.visualize_all_factors()
def initialize(self, optimizer=None, var_list=None, use_prettytensor=False, *args, **kwargs): """Initialize variational inference. Parameters ---------- optimizer : str or tf.train.Optimizer, optional A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. var_list : list of tf.Variable, optional List of TensorFlow variables to optimize over. Default is all trainable variables that ``latent_vars`` and ``data`` depend on, excluding those that are only used in conditionals in ``data``. use_prettytensor : bool, optional ``True`` if aim to use PrettyTensor optimizer (when using PrettyTensor) or ``False`` if aim to use TensorFlow optimizer. Defaults to TensorFlow. """ super(VariationalInference, self).initialize(*args, **kwargs) if var_list is None: if self.model_wrapper is None: # Traverse random variable graphs to get default list of variables. var_list = set([]) trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) else: # Variables may not be instantiated for model wrappers until # their methods are first called. For now, hard-code # ``var_list`` inside build_losses. var_list = None self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) if optimizer is None: # Use ADAM with a decaying scale factor. global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(0.01) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer() elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(0.01) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(0.01, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer() elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(0.01) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(0.01) else: raise ValueError('Optimizer class not found:', optimizer) global_step = None elif isinstance(optimizer, tf.train.Optimizer): # Custom optimizers have no control over global_step. global_step = None else: raise TypeError() if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: # Note PrettyTensor optimizer does not accept manual updates; # it autodiffs the loss directly. self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list)
def main(_=None): print 'Starting Shakespeare' # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) merged_size = BATCH_SIZE * TIMESTEPS inp = data_utils.reshape_data(input_placeholder) # We need a dense output to calculate loss and accuracy. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. t = tf.concat(1, [ tf.constant( numpy.arange(merged_size).reshape((merged_size, 1)), dtype=tf.int32), data_utils.reshape_data(output_placeholder) ]) labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0) # Some ops have different behaviors in test vs train and these take a phase # argument. with tf.variable_scope('shakespeare'): training_logits = create_model(inp, TIMESTEPS, pt.Phase.train) # Create the result. Softmax applies softmax and creates a cross entropy # loss. The result is a namedtuple. training_result = training_logits.softmax(labels) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdagradOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss]) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # We also want to disable dropout, so we pass the phase to create_model. # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('shakespeare', reuse=True): test_logits = create_model(inp, TIMESTEPS, pt.Phase.test) test_result = test_logits.softmax(labels) # Accuracy creates variables, so make it outside of the above scope. accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test) # Create an inference model so that we can sample. The big difference is # that the input is a single character and it requires reset nodes. with tf.variable_scope('shakespeare', reuse=True): inference_input = tf.placeholder(tf.int32, []) # Needs to be 2 dimensional so that it matches the dims of the other models. reshaped = pt.wrap(inference_input).reshape([1, 1]) inference_logits = create_model(reshaped, 1, pt.Phase.infer) # Grab the data as numpy arrays. shakespeare = data_utils.shakespeare(TIMESTEPS + 1) shakespeare_in = shakespeare[:, :-1] shakespeare_out = shakespeare[:, 1:] # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(FLAGS.epochs): # Shuffle the training data. shakespeare_in, shakespeare_out = data_utils.permute_data( (shakespeare_in, shakespeare_out)) runner.train_model(train_op, training_result.loss, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, shakespeare_in, shakespeare_out), print_every=10) classification_accuracy = runner.evaluate_model( accuracy, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in, shakespeare_out)) print 'Next character accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100) # Use a temperature smaller than 1 because the early stages of the model # don't assign much confidence. print sample(inference_input, inference_logits, max_length=128, temperature=0.5) # Print a sampling from the model. print sample(inference_input, inference_logits)
import prettytensor as pt import numpy as np import cmtf.data.data_mnist as data_mnist # 数据 mnist = data_mnist.read_data_sets(one_hot=True) x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None,10]) pretty_input = pt.wrap(x) softmax, loss = ( pretty_input. fully_connected(100, activation_fn=tf.nn.relu). fully_connected(10, activation_fn=None). softmax_classifier(10, labels=y)) accuracy = softmax.evaluate_classifier(y) optimizer = tf.train.GradientDescentOptimizer(0.01) # learning rate train_op = pt.apply_optimizer(optimizer, losses=[loss]) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) # train for i in range(2000): batch_xs, batch_ys = mnist.train.next_batch(100) _, loss_val = sess.run([train_op, loss], feed_dict={x: batch_xs, y: batch_ys}) if (i+1)%100 == 0: print 'index: %d, loss: %f' % (i+1, loss_val) # test accuracy_value = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels}) print 'Accuracy: %g' % accuracy_value
def initialize(self, n_iter=1000, n_minibatch=None, n_print=100, optimizer=None, scope=None): """Initialize variational inference algorithm. Set up ``tf.train.AdamOptimizer`` with a decaying scale factor. Initialize all variables. Parameters ---------- n_iter : int, optional Number of iterations for optimization. n_minibatch : int, optional Number of samples for data subsampling. Default is to use all the data. Subsampling is available only if all data passed in are NumPy arrays and the model is not a Stan model. For subsampling details, see ``tf.train.slice_input_producer`` and ``tf.train.batch``. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify None. optimizer : str, optional Whether to use TensorFlow optimizer or PrettyTensor optimizer when using PrettyTensor. Defaults to TensorFlow. scope : str, optional Scope of TensorFlow variable objects to optimize over. """ self.n_iter = n_iter self.n_minibatch = n_minibatch self.n_print = n_print self.loss = tf.constant(0.0) if n_minibatch is not None and not isinstance(self.model, StanModel): # Re-assign data to batch tensors, with size given by # ``n_data``. values = list(six.itervalues(self.data)) slices = tf.train.slice_input_producer(values) # By default use as many threads as CPUs. batches = tf.train.batch(slices, n_minibatch, num_threads=multiprocessing.cpu_count()) if not isinstance(batches, list): # ``tf.train.batch`` returns tf.Tensor if ``slices`` is a # list of size 1. batches = [batches] self.data = {key: value for key, value in zip(six.iterkeys(self.data), batches)} loss = self.build_loss() if optimizer is None: var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) # Use ADAM with a decaying scale factor. global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) self.train = optimizer.minimize(loss, global_step=global_step, var_list=var_list) else: if scope is not None: raise NotImplementedError("PrettyTensor optimizer does not accept a variable scope.") optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) self.train = pt.apply_optimizer(optimizer, losses=[loss]) init = tf.initialize_all_variables() init.run() # Start input enqueue threads. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord)
def init_opt(self): #self.input_tensor = input_tensor = tf.placeholder(tf.float32, [self.batch_size, self.dataset.image_dim]) #self.images = tf.placeholder(tf.float32, [self.batch_size, self.image_size,self.image_size,1]) self.images = tf.placeholder(tf.float32, [self.batch_size, self.dataset.image_size, self.dataset.image_size, self.dataset.c_dim ]) pstr('self.input_tensor',self.input_tensor) with pt.defaults_scope(phase=pt.Phase.train): z_var = self.model.latent_dist.sample_prior(self.batch_size) pstr('0 batch_size',self.batch_size) pstr('1 z_var',z_var) #print("1 %d | " % z_var ) fake_x, _ = self.model.generate(z_var) pstr('1.1 fake_x',fake_x) pstr('1.1 self.images',self.images) real_d, _, _, _, real_d_log = self.model.discriminate(self.images) fake_d, sample, fake_reg_z_dist_info, reg_dist_flat, fake_d_log = self.model.discriminate(fake_x) #d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(real_d_log, tf.ones_like(real_d))) #d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(fake_d_log, tf.zeros_like(fake_d))) #discriminator_loss = d_loss_real + d_loss_fake #generator_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(fake_d_log, tf.ones_like(fake_d))) pstr('1.1.2 sample',sample) pstr('1.1.2 fake_reg_z_dist_info',fake_reg_z_dist_info) pstr('1.1.2 reg_dist_flat',reg_dist_flat) pstr('1.1 fake_d',fake_d) pstr('1.5 fake_reg_z_dist_info',fake_reg_z_dist_info) reg_z = self.model.reg_z(z_var) pstr('2 reg_z',reg_z) discriminator_loss = - tf.reduce_mean(tf.log(real_d + TINY) + tf.log(1. - fake_d + TINY)) generator_loss = - tf.reduce_mean(tf.log(fake_d + TINY)) self.log_vars.append(("discriminator_loss", discriminator_loss)) self.log_vars.append(("generator_loss", generator_loss)) mi_est = tf.constant(0.) cross_ent = tf.constant(0.) # compute for discrete and continuous codes separately # discrete: if len(self.model.reg_disc_latent_dist.dists) > 0: disc_reg_z = self.model.disc_reg_z(reg_z) pstr('3 disc_reg_z',disc_reg_z) disc_reg_dist_info = self.model.disc_reg_dist_info(fake_reg_z_dist_info) pstr('4 disc_reg_dist_info',disc_reg_dist_info) disc_log_q_c_given_x = self.model.reg_disc_latent_dist.logli(disc_reg_z, disc_reg_dist_info) pstr('5 disc_log_q_c_given_x',disc_log_q_c_given_x) disc_log_q_c = self.model.reg_disc_latent_dist.logli_prior(disc_reg_z) pstr('6 disc_log_q_c',disc_log_q_c) disc_cross_ent = tf.reduce_mean(-disc_log_q_c_given_x) pstr('7 disc_cross_ent',disc_cross_ent) disc_ent = tf.reduce_mean(-disc_log_q_c) disc_mi_est = disc_ent - disc_cross_ent mi_est += disc_mi_est cross_ent += disc_cross_ent self.log_vars.append(("MI_disc", disc_mi_est)) self.log_vars.append(("CrossEnt_disc", disc_cross_ent)) discriminator_loss -= 10 * disc_mi_est generator_loss -= self.info_reg_coeff * disc_mi_est if len(self.model.reg_cont_latent_dist.dists) > 0: cont_reg_z = self.model.cont_reg_z(reg_z) pstr('8 cont_reg_z',cont_reg_z) cont_reg_dist_info = self.model.cont_reg_dist_info(fake_reg_z_dist_info) pstr('9 cont_reg_dist_info', cont_reg_dist_info) cont_log_q_c_given_x = self.model.reg_cont_latent_dist.logli(cont_reg_z, cont_reg_dist_info) pstr('10 cont_log_q_c_given_x', cont_log_q_c_given_x) cont_log_q_c = self.model.reg_cont_latent_dist.logli_prior(cont_reg_z) pstr('11 cont_log_q_c',cont_log_q_c) cont_cross_ent = tf.reduce_mean(-cont_log_q_c_given_x) cont_ent = tf.reduce_mean(-cont_log_q_c) cont_mi_est = cont_ent - cont_cross_ent mi_est += cont_mi_est cross_ent += cont_cross_ent self.log_vars.append(("MI_cont", cont_mi_est)) self.log_vars.append(("CrossEnt_cont", cont_cross_ent)) discriminator_loss -= 10 * cont_mi_est generator_loss -= self.info_reg_coeff * cont_mi_est pstr('1.1 generator_loss',generator_loss) for idx, dist_info in enumerate(self.model.reg_latent_dist.split_dist_info(fake_reg_z_dist_info)): if "stddev" in dist_info: self.log_vars.append(("max_std_%d" % idx, tf.reduce_max(dist_info["stddev"]))) self.log_vars.append(("min_std_%d" % idx, tf.reduce_min(dist_info["stddev"]))) self.log_vars.append(("MI", mi_est)) self.log_vars.append(("CrossEnt", cross_ent)) all_vars = tf.trainable_variables() d_vars = [var for var in all_vars if var.name.startswith('d_')] g_vars = [var for var in all_vars if var.name.startswith('g_')] pstr('1.1 g_vars',g_vars) self.log_vars.append(("max_real_d", tf.reduce_max(real_d))) self.log_vars.append(("min_real_d", tf.reduce_min(real_d))) self.log_vars.append(("max_fake_d", tf.reduce_max(fake_d))) self.log_vars.append(("min_fake_d", tf.reduce_min(fake_d))) discriminator_optimizer = tf.train.AdamOptimizer(self.discriminator_learning_rate, beta1=0.1) self.discriminator_trainer = pt.apply_optimizer(discriminator_optimizer, losses=[discriminator_loss], var_list=d_vars) generator_optimizer = tf.train.AdamOptimizer(self.generator_learning_rate, beta1=0.1,epsilon=1e-1024) self.generator_trainer = pt.apply_optimizer(generator_optimizer, losses=[generator_loss], var_list=g_vars) for k, v in self.log_vars: tf.scalar_summary(k, v) with pt.defaults_scope(phase=pt.Phase.test): with tf.variable_scope("model", reuse=True) as scope: self.visualize_all_factors()
def initialize(self, optimizer=None, scope=None, use_prettytensor=False, *args, **kwargs): """Initialize variational inference algorithm. Initialize all variables. Parameters ---------- optimizer : str or tf.train.Optimizer, optional A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. scope : str, optional Scope of TensorFlow variables to optimize over. Default is all trainable variables. use_prettytensor : bool, optional ``True`` if aim to use TensorFlow optimizer or ``False`` if aim to use PrettyTensor optimizer (when using PrettyTensor). Defaults to TensorFlow. """ super(VariationalInference, self).initialize(*args, **kwargs) self.loss = tf.constant(0.0) if optimizer is None: # Use ADAM with a decaying scale factor. global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(0.01) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer() elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(0.01) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(0.01, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer() elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(0.01) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(0.01) else: raise ValueError('Optimizer class not found:', optimizer) global_step = None elif isinstance(optimizer, tf.train.Optimizer): # Custom optimizers have no control over global_step. global_step = None else: raise TypeError() if getattr(self, 'build_loss_and_gradients', None) is not None: self.loss, grads_and_vars = self.build_loss_and_gradients(scope=scope) else: self.loss = self.build_loss() var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) grads_and_vars = optimizer.compute_gradients(self.loss, var_list=var_list) if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: if getattr(self, 'build_loss_and_gradients', None) is not None: raise NotImplementedError("PrettyTensor optimizer does not accept " "manual gradients.") self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list)
vae_loss = get_vae_cost(mean, stddev) #rec_loss = get_reconstruction_cost(output_tensor, input_tensor) #loss = vae_loss + rec_loss loss = get_dvib_cost(mean, stddev, output_tensor, label_tensor) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal( tf.sign(output_tensor), tf.cast(tf.sign(label_tensor), tf.float32)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0) train = pt.apply_optimizer(optimizer, losses=[loss]) saver = tf.train.Saver() init = tf.initialize_all_variables() # Config session for memory config = tf.ConfigProto() #config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.log_device_placement = True with tf.Session(config=config) as sess: sess.run(init) for epoch in range(FLAGS.max_epoch): training_loss = 0.0
def initialize(self, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, *args, **kwargs): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Args: optimizer: str or tf.train.Optimizer, optional. A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. var_list: list of tf.Variable, optional. List of TensorFlow variables to optimize over. Default is all trainable variables that `latent_vars` and `data` depend on, excluding those that are only used in conditionals in `data`. use_prettytensor: bool, optional. `True` if aim to use PrettyTensor optimizer (when using PrettyTensor) or `False` if aim to use TensorFlow optimizer. Defaults to TensorFlow. global_step: tf.Variable, optional. A TensorFlow variable to hold the global step. """ super(VariationalInference, self).initialize(*args, **kwargs) if var_list is None: # Traverse random variable graphs to get default list of variables. var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) if self.logging: summary_key = 'summaries_' + str(id(self)) tf.summary.scalar("loss", self.loss, collections=[summary_key]) for grad, var in grads_and_vars: # replace colons which are an invalid character tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[summary_key]) self.summarize = tf.summary.merge_all(key=summary_key) if optimizer is None and global_step is None: # Default optimizer always uses a global step variable. global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError("Optimizer must be str, tf.train.Optimizer, or None.") scope = "optimizer_" + str(id(self)) with tf.variable_scope(scope): if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: # Note PrettyTensor optimizer does not accept manual updates; # it autodiffs the loss directly. self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append(tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
dataX, dataY = utils.get_data_batches(NUM_DATA_PTS, BATCH_SIZE) # Set up the neural network using PrettyTensor. # Use a simple CNN with one hidden layer of 50 neurons. input_t = tf.placeholder(tf.float32, (BATCH_SIZE, dataX[0].shape[1], dataX[0].shape[2], 1), name="input_t") labels_t = tf.placeholder(tf.float32, (BATCH_SIZE, NUM_CLASSES), name="labels_t") input_p = prettytensor.wrap(input_t) hidden_p = (input_p .conv2d(3, 4, edges='VALID') .max_pool(2, 2) .flatten() .fully_connected(50)) softmax_p, loss_p = hidden_p.softmax_classifier(NUM_CLASSES, labels_t) optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) optim_p = prettytensor.apply_optimizer(optimizer, losses=[loss_p]) # Train and evaluate the neural network. with tf.Session() as sess: tf.initialize_all_variables().run() loss_over_time = [] vloss_over_time = [] for epoch in range(NUM_EPOCHS): # Save 1 batch for validation. for i in range(len(dataX)-1): loss, _ = sess.run([loss_p, optim_p], { input_t: dataX[i], labels_t: dataY[i] }) loss_over_time.append(loss)
def main(_=None, weight_init=None, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=3,learning_rate=None): tf.reset_default_graph() input_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 2]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1]) # Grab the data as numpy arrays. train_input, train_output = data_utils.mnist(training=True) test_input, test_output = data_utils.mnist(training=False) train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale) test_set = ut.mnist_select_n_classes(test_input, test_output, NUM_CLASSES, min=data_min, scale=data_scale) train_input, train_output = train_set[1], train_set[0] test_input, test_output = test_set[1], test_set[0] ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0]))) visual_inputs, visual_output = train_set[1][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE] epoch_reconstruction = [] EPOCH_SIZE = len(train_input) // BATCH_SIZE TEST_SIZE = len(test_input) // BATCH_SIZE ut.print_info('train: %s' % str(train_input.shape)) ut.print_info('test: %s' % str(test_input.shape)) ut.print_info('output shape: %s' % str(train_output[0].shape)) assert visual_inputs.shape == input_placeholder.get_shape() assert len(train_input.shape) == len(input_placeholder.get_shape()) assert len(test_input.shape) == len(input_placeholder.get_shape()) assert visual_output.shape == output_placeholder.get_shape() assert len(train_output.shape) == len(output_placeholder.get_shape()) assert len(test_output.shape) == len(output_placeholder.get_shape()) with pt.defaults_scope(activation_fn=activation_f, # batch_normalize=True, # learned_moments_update_rate=0.0003, # variance_epsilon=0.001, # scale_after_normalization=True ): with pt.defaults_scope(phase=pt.Phase.train): with tf.variable_scope("model") as scope: output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init) pretty_loss = loss(output_tensor, output_placeholder) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) train = pt.apply_optimizer(optimizer, losses=[pretty_loss]) init = tf.initialize_all_variables() runner = pt.train.Runner(save_path=FLAGS.save_path) best_q = 100000 with tf.Session() as sess: sess.run(init) for epoch in xrange(epochs): # Shuffle the training data. if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs: reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output}) epoch_reconstruction.append(reconstruct) ut.print_info('epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct))) train_input, train_output = data_utils.permute_data( (train_input, train_output)) runner.train_model( train, pretty_loss, EPOCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output) ) accuracy = runner.evaluate_model( pretty_loss, TEST_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output)) ut.print_time('Accuracy after %d epoch %g%%' % ( epoch + 1, accuracy * 100)) if best_q > accuracy * 10: best_q = accuracy * 10 ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output, save_params={'suf':'mn_trivs', 'act':activation_f, 'e':epochs, 'opt':optimizer, 'lr': learning_rate, 'init':weight_init, 'acu': int(best_q)})