def finetune_parameters(self, output_dim, epochs=1, batch_method="random"): """Performs fine tuning on all parameters of the neural network plus two additional softmax variables. Call this method after `pretrain_network` is complete. Y values should be represented in one-hot format. :param x_train_path: A string, the path to the x train values. :param y_train_path: A string, the path to the y train values. :param output_dim: An int, the number of classes in the target classification problem. Ex: 10 for MNIST. :param epochs: An int, the number of iterations to tune through the entire dataset. :param batch_method: A string, either 'random' or 'sequential', to indicate how batches are retrieved. :return: The tuned softmax parameters (weights and biases) of the classification layer. """ data = input_data.read_data_sets("data/MNIST", one_hot=True) if batch_method == "random": x_train = data.train.images y_label = data.train.labels shuff = zip(x_train, y_label) np.random.shuffle(shuff) xy_train = [ _ for _ in utilities.gen_batches(shuff, FLAGS.batch_size) ] else: x_train = data.train.images y_label = data.train.labels shuff = zip(x_train, y_label) xy_train = [ _ for _ in utilities.gen_batches(shuff, FLAGS.batch_size) ] return self.finetune_parameters_gen(xy_train_gen=xy_train, output_dim=output_dim)
def pre_train_network(self): print 'Starting to pretrain autoencoder network.' data = input_data.read_data_sets("data/MNIST", one_hot=True) for i in range(len(self.hidden_layers)): if FLAGS.batch_method == "random": # x_train = get_random_batch_generator(self.batch_size, FLAGS.x_train_path, repeat=FLAGS.epochs - 1) # data = input_data.read_data_sets("data/MNIST", one_hot=True) x_train = data.train.images np.random.shuffle(x_train) x_train = [ _ for _ in utilities.gen_batches(x_train, FLAGS.batch_size) ] else: # x_train = get_batch_generator(FLAGS.x_train_path, self.batch_size, repeat=FLAGS.epochs-1) # data = input_data.read_data_sets("data/MNIST", one_hot=True) x_train = data.train.images x_train = [ _ for _ in utilities.gen_batches(x_train, FLAGS.batch_size) ] self.pre_train_layer(i, x_train) print 'Finished pretraining of autoencoder network.'
if __name__ == '__main__': IMG_SIZE = 28 gbrbm = GBRBM(IMG_SIZE * IMG_SIZE, 500, cdk=30, epoch=300) o_train_set_x = np.load('../theano_rbm/data/origin_target_train_28.npy') # print type(o_train_set_x), o_train_set_x.shape, np.max(o_train_set_x), np.min(o_train_set_x) # o_train_set_x = np.load('../theano_rbm/data/face_train_dataset_19.npy') sess = tf.Session() summary = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) sess.run(tf.initialize_all_variables()) np.random.shuffle(o_train_set_x) batches = [ _ for _ in utilities.gen_batches(o_train_set_x, int(gbrbm.batch_size)) ] updates = gbrbm.update_parameter() # updates = gbrbm.no_adaptive_update_parameter() anneal_counter = 0 base_lrate = gbrbm.gradient_lr _, _, energy_sum = gbrbm.energy_function(gbrbm.input_img, gbrbm.W, gbrbm.v, gbrbm.h, gbrbm.sigma) # print updates # energy0_sum_rcon, energy0_sum_origin, energy_sum, energy_sum2, now_cost = sess.run(updates, feed_dict={gbrbm.input_img: batches[0]}) # print energy0_sum_rcon, energy0_sum_origin, energy_sum, energy_sum2 # print now_cost, type(now_cost)
def finetune_parameters_gen(self, xy_train_gen, output_dim, epochs): """An implementation of finetuning to support data feeding from generators.""" sess = self.sess summary_list = [] batch_s = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay( 0.0001, # Base learning rate. batch_s, # Current index into the dataset. 30000, # Decay step. 0.95, # Decay rate. staircase=True) print("Starting to fine tune parameters of network.") with tf.name_scope("finetuning"): with tf.name_scope("inputs"): x = tf.placeholder(tf.float32, shape=[None, self.input_dim], name="raw_input") with tf.name_scope("fully_encoded"): x_encoded = self.get_encoded_input( x, depth=-1) # Full depth encoding """Note on W below: The difference between self.output_dim and output_dim is that the former is the output dimension of the autoencoder stack, which is the dimension of the new feature space. The latter is the dimension of the y value space for classification. Ex: If the output should be binary, then the output_dim = 2.""" with tf.name_scope("outputs"): y_logits = tf.matmul(x_encoded, self.W) + self.b with tf.name_scope("predicted"): y_pred = tf.nn.softmax(y_logits, name="y_pred") # attach_variable_summaries(y_pred, y_pred.name, summ_list=summary_list) with tf.name_scope("actual"): y_actual = tf.placeholder(tf.float32, shape=[None, output_dim], name="y_actual") # attach_variable_summaries(y_actual, y_actual.name, summ_list=summary_list) trainable_vars = self.get_all_variables( additional_layer=[self.W, self.b]) trainable_weights = self.get_all_variables_weights( additional_layer=[self.W]) with tf.name_scope('weights_norm'): weights_norm = tf.reduce_sum(input_tensor=tf.reduce_mean( tf.pack([ FLAGS.deacy_factor * tf.nn.l2_loss(weight) for weight in trainable_weights ])), name='weights_norm') with tf.name_scope("cross_entropy"): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( y_logits, y_actual)) # loss = cross_entropy + weights_norm loss = cross_entropy attach_scalar_summary(cross_entropy, "cross_entropy", summ_list=summary_list) attach_scalar_summary(learning_rate, 'finetune_lr', summ_list=summary_list) # attach_scalar_summary(loss, 'loss_val', summ_list=summary_list) with tf.name_scope("train_step"): train_step = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize( loss, var_list=trainable_vars, global_step=batch_s) # # Use simple momentum for the optimization. # train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, var_list=trainable_vars, global_step=batch_s) with tf.name_scope("evaluation"): correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_actual, 1)) accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) attach_scalar_summary(accuracy, "finetune_accuracy", summ_list=summary_list) sess.run(tf.initialize_all_variables()) # Merge summaries and get a summary writer merged = tf.merge_summary(summary_list) train_writer = tf.train.SummaryWriter( FLAGS.log_dir + "/train/finetune", sess.graph) step = 0 for i in range(epochs): np.random.shuffle(xy_train_gen) # print len(xy_train_gen) train_data_batchs = [ _ for _ in utilities.gen_batches(xy_train_gen, FLAGS.batch_size) ] for batch in train_data_batchs: batch_xs, batch_ys = zip(*batch) # print 'get xs batch size===', len(batch_xs), type(batch_xs[0]), batch_xs[0].shape # print 'get ys batch size===', len(batch_ys), type(batch_ys[0]), batch_ys[0].shape if step % self.print_step == 0: print( "Step %s, batch accuracy: " % step, sess.run(accuracy, feed_dict={ x: batch_xs, y_actual: batch_ys })) # print('pop_mean in hidden layer 1 is :', sess.run(self.hidden_layers[1].pop_mean)) # For debugging predicted y values if step % (self.print_step * 10) == 0: print("Predicted y-value:", sess.run(y_pred, feed_dict={x: batch_xs})[0]) print("Actual y-value:", batch_ys[0]) if step % FLAGS.log_step == 0: summary = sess.run(merged, feed_dict={ x: batch_xs, y_actual: batch_ys }) train_writer.add_summary(summary, global_step=step) # For debugging, break early. if FLAGS.debug and step > 5: break sess.run(train_step, feed_dict={ x: batch_xs, y_actual: batch_ys }) step += 1 print("Completed fine-tuning of parameters.") tuned_params = { "layer1_weights": sess.run(self.hidden_layers[0].get_weight_variable()), "layer2_weights": sess.run(self.hidden_layers[1].get_weight_variable()), "layer3_weights": sess.run(self.hidden_layers[2].get_weight_variable()), "weights": sess.run(self.W), "biases": sess.run(self.b), "layer1_pop_mean": sess.run(self.hidden_layers[1].pop_mean), "layer1_pop_var": sess.run(self.hidden_layers[1].pop_var) } return tuned_params
def pre_train_layer(self, depth, data, epoch): self.pretrain_lr = 0.01 sess = self.sess print 'Starting to pretrain layer %d.' % depth hidden_layer = self.hidden_layers[depth] summary_list = [] with tf.name_scope(hidden_layer.name): with tf.name_scope("x_values"): x_original = tf.placeholder(tf.float32, shape=[None, self.input_dim]) x_latent = self.get_encoded_input(x_original, depth) x_corrupt = corrupt(x_latent, corruption_level=self.noise) with tf.name_scope("encoded_and_decoded"): encoded = hidden_layer.encode(x_corrupt) encoded = tf.nn.dropout(encoded, keep_prob=0.5) decoded = hidden_layer.decode(encoded) # attach_variable_summaries(encoded, "encoded", summ_list=summary_list) # attach_variable_summaries(decoded, "decoded", summ_list=summary_list) attach_variable_summaries(hidden_layer.get_weight_variable(), "weights", summ_list=summary_list) with tf.name_scope('weights_norm'): weights_norm = tf.reduce_sum( input_tensor=FLAGS.deacy_factor * tf.nn.l2_loss(hidden_layer.get_weight_variable()), name='weights_norm') # Reconstruction loss with tf.name_scope("reconstruction_loss"): # loss = self.get_loss(x_latent, decoded) val_loss = self.get_l2_loss(x_latent, decoded) loss = val_loss + weights_norm attach_scalar_summary(loss, "%s_loss" % 'l2_loss', summ_list=summary_list) # attach_scalar_summary(self.pretrain_lr, 'pretrain_lr', summ_list=summary_list) trainable_vars = [ hidden_layer.weights, hidden_layer.biases, hidden_layer.decode_biases ] # Only optimize variables for this layer ("greedy") with tf.name_scope("train_step"): train_op = tf.train.AdamOptimizer( learning_rate=self.pretrain_lr).minimize( loss, var_list=trainable_vars) sess.run(tf.initialize_all_variables()) # Merge summaries and get a summary writer merged = tf.merge_summary(summary_list) pretrain_writer = tf.train.SummaryWriter( "model/" + hidden_layer.name, sess.graph) step = 0 for i in range(epoch): np.random.shuffle(data) batches = [ _ for _ in utilities.gen_batches(data, FLAGS.batch_size) ] for batch_x_original in batches: sess.run(train_op, feed_dict={x_original: batch_x_original}) if step % self.print_step == 0: loss_value = sess.run( loss, feed_dict={x_original: batch_x_original}) endoce_mean = sess.run( tf.reduce_mean(encoded), feed_dict={x_original: batch_x_original}) print("Step %s, batch %s loss = %s, weights_mean=%s" % (step, 'l2_loss', loss_value, endoce_mean)) if step % FLAGS.log_step == 0: summary = sess.run( merged, feed_dict={x_original: batch_x_original}) pretrain_writer.add_summary(summary, global_step=step) # Break for debugging purposes if FLAGS.debug and step > 5: break step += 1 # if epoch % 5 == 0: # if self.pretrain_lr >= 0.00001: # self.pretrain_lr /= 2.0 print( "Finished pretraining of layer %d. Updated layer weights and biases." % depth)
# loss = auto_encoder.loss_corss_entropy(output) optimize = auto_encoder.train(loss) auto_encoder.summary_parameter(loss) sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) writer = tf.train.SummaryWriter('model', sess.graph) summary = tf.merge_all_summaries() x_corrupted = _corrupt_input(o_train_set_x) shuff = zip(o_train_set_x, x_corrupted) for step in range(FLAGS.epochs): np.random.shuffle(shuff) batches = [_ for _ in utilities.gen_batches(shuff, FLAGS.batch_size)] start_time = time.time() for batch in batches: x_batch, x_corr_batch = zip(*batch) _, loss_value, summary_val, output_val = sess.run( [optimize, loss, summary, output], feed_dict={ auto_encoder.input: x_corr_batch, auto_encoder.input_with_out_noise: x_batch }) writer.add_summary(summary_val) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec) %.2f ' %
def finetune_parameters_gen(self, xy_train_gen, output_dim, epochs): """An implementation of finetuning to support data feeding from generators.""" sess = self.sess summary_list = [] print("Starting to fine tune parameters of network.") with tf.name_scope("finetuning"): with tf.name_scope("inputs"): x = tf.placeholder(tf.float32, shape=[None, self.input_dim], name="raw_input") with tf.name_scope("fully_encoded"): x_encoded = self.get_encoded_input( x, depth=-1) # Full depth encoding """Note on W below: The difference between self.output_dim and output_dim is that the former is the output dimension of the autoencoder stack, which is the dimension of the new feature space. The latter is the dimension of the y value space for classification. Ex: If the output should be binary, then the output_dim = 2.""" with tf.name_scope("softmax_variables"): self.W = weight_variable(self.output_dim, output_dim, name="weights") self.b = bias_variable(output_dim, initial_value=0, name="biases") attach_variable_summaries(self.W, self.W.name, summ_list=summary_list) attach_variable_summaries(self.b, self.b.name, summ_list=summary_list) with tf.name_scope("outputs"): y_logits = tf.matmul(x_encoded, self.W) + self.b with tf.name_scope("predicted"): y_pred = tf.nn.softmax(y_logits, name="y_pred") attach_variable_summaries(y_pred, y_pred.name, summ_list=summary_list) with tf.name_scope("actual"): y_actual = tf.placeholder(tf.float32, shape=[None, output_dim], name="y_actual") attach_variable_summaries(y_actual, y_actual.name, summ_list=summary_list) with tf.name_scope("cross_entropy"): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( y_logits, y_actual)) attach_scalar_summary(cross_entropy, "cross_entropy", summ_list=summary_list) trainable_vars = self.get_all_variables( additional_layer=[self.W, self.b]) with tf.name_scope("train_step"): train_step = tf.train.AdamOptimizer( learning_rate=self.finetune_lr).minimize( cross_entropy, var_list=trainable_vars) with tf.name_scope("evaluation"): correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_actual, 1)) accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) attach_scalar_summary(accuracy, "finetune_accuracy", summ_list=summary_list) sess.run(tf.initialize_all_variables()) # Merge summaries and get a summary writer merged = tf.merge_summary(summary_list) train_writer = tf.train.SummaryWriter( FLAGS.log_dir + "/train/finetune", sess.graph) step = 0 for i in range(epochs): np.random.shuffle(xy_train_gen) train_data_batchs = [ _ for _ in utilities.gen_batches(xy_train_gen, FLAGS.batch_size) ] for batch in train_data_batchs: batch_xs, batch_ys = zip(*batch) # print 'get xs batch size===', len(batch_xs), type(batch_xs[0]), batch_xs[0].shape # print 'get ys batch size===', len(batch_ys), type(batch_ys[0]), batch_ys[0].shape if step % self.print_step == 0: print( "Step %s, batch accuracy: " % step, sess.run(accuracy, feed_dict={ x: batch_xs, y_actual: batch_ys })) # For debugging predicted y values if step % (self.print_step * 10) == 0: print("Predicted y-value:", sess.run(y_pred, feed_dict={x: batch_xs})[0]) print("Actual y-value:", batch_ys[0]) if step % FLAGS.log_step == 0: summary = sess.run(merged, feed_dict={ x: batch_xs, y_actual: batch_ys }) train_writer.add_summary(summary, global_step=step) # For debugging, break early. if FLAGS.debug and step > 5: break sess.run(train_step, feed_dict={ x: batch_xs, y_actual: batch_ys }) step += 1 print("Completed fine-tuning of parameters.") tuned_params = { "layer1_weights": sess.run(self.hidden_layers[0].get_weight_variable()), "layer2_weights": sess.run(self.hidden_layers[1].get_weight_variable()), "layer3_weights": sess.run(self.hidden_layers[2].get_weight_variable()), "weights": sess.run(self.W), "biases": sess.run(self.b) } return tuned_params
tuned_params = {"weights": sess.run(W), "biases": sess.run(b)} return tuned_params if __name__ == '__main__': # Start a TensorFlow session sess = tf.Session() # Initialize an unconfigured autoencoder with specified dimensions, etc. sda = SDAutoencoder(dims=[784, 256, 64, 32], activations=["relu", "relu", "relu"], sess=sess, noise=0.1) # Pretrain weights and biases of each layer in the network. # sda.pre_train_network() # Read in test y-values to softmax classifier. # sda.finetune_parameters(epochs=10, output_dim=10) # Write to file the newly represented features. # sda.write_encoded_input(filepath="data/transformed.csv", x_test_path=FLAGS.x_train_path) data = input_data.read_data_sets("data/MNIST", one_hot=True) temp_train = data.train.images for i in range(100): np.random.shuffle(temp_train) x_train = [ _ for _ in utilities.gen_batches(temp_train, FLAGS.batch_size) ] sda.pre_train_layer(0, x_train)