def main(argv=None): trainIn, trainOut, testIn, testOut = DataReader.GetData(font_count) trainIn = np.expand_dims(trainIn, axis=3) testIn = np.expand_dims(testIn, axis=3) trainIn = trainIn / 255.0 testIn = testIn / 255.0 print('trainIn', trainIn.shape) print('testIn', testIn.shape) batch = 2350 / 10 iter_count = (int)(np.ceil(1.0 * testIn.shape[0] / batch)) iter_count_valid = (int)(np.ceil(1.0 * testIn.shape[0] / batch)) X = tf.placeholder(tf.float32, [None, trainIn.shape[1], trainIn.shape[2], 1]) Y = tf.placeholder(tf.int32, [None]) IsTrain = tf.placeholder(tf.bool) predict = model.inference(X, IsTrain) argMax = tf.cast(tf.arg_max(predict, 1), tf.int32) print('argMax', argMax) acc = tf.reduce_mean(tf.cast(tf.equal(argMax, Y), tf.float32)) entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=predict, labels=Y)) loss = entropy + 1e-5 * helper.regularizer() optimizer = tf.train.AdamOptimizer(LearningRate).minimize(loss) with tf.Session() as sess: tf.global_variables_initializer().run() saver = tf.train.Saver() if isNewTrain: print('Initialized!') else: saver.restore(sess, model.modelName) print("Model restored") sess.run(tf.local_variables_initializer()) start_sec = time.time() for step in range(NUM_EPOCHS): for iter in range(iter_count): offset = iter * batch feed_dict = { X: trainIn[offset:offset + batch], Y: trainOut[offset:offset + batch], IsTrain: True } _, l, accr = sess.run([optimizer, entropy, acc], feed_dict) now = strftime("%H:%M:%S", localtime()) if step % EVAL_FREQUENCY == 0: accr_v_sum = 0 for iter_v in range(iter_count_valid): offset = iter_v * batch feed_dict_test = { X: testIn[offset:offset + batch], Y: testOut[offset:offset + batch], IsTrain: False } l_v, accr_v = sess.run([entropy, acc], feed_dict_test) accr_v_sum += accr_v if accr_v < 0.01: break accr_v_mean = accr_v_sum / iter_count_valid print('%d, acc(%.3f,%.3f), entropy (%.6f,%.6f), %s' % (step, accr, accr_v_mean, l, l_v, now)) this_sec = time.time() if this_sec - start_sec > 60 * 15: start_sec = this_sec save_path = saver.save(sess, model.modelName) print("Model Saved, time:%s" % (now)) variable_global = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) trainable_vars = tf.trainable_variables() for var in trainable_vars: print(var) print('saver.save()', saver.save(sess, model.modelName), len(trainable_vars), len(variable_global))
import time import numpy as np from time import localtime, strftime import tensorflow as tf from util.data_reader import DataReader import util.trainer_helper as helper import model.dense as model DataReader = DataReader() EVAL_FREQUENCY = 1 NUM_EPOCHS = 20 font_count = 2 isNewTrain = True LearningRate = 0.001 def main(argv=None): trainIn, trainOut, testIn, testOut = DataReader.GetData(font_count) trainIn = np.expand_dims(trainIn, axis=3) testIn = np.expand_dims(testIn, axis=3) trainIn = trainIn / 255.0 testIn = testIn / 255.0 print('trainIn', trainIn.shape) print('testIn', testIn.shape) batch = 2350 / 10 iter_count = (int)(np.ceil(1.0 * testIn.shape[0] / batch)) iter_count_valid = (int)(np.ceil(1.0 * testIn.shape[0] / batch)) X = tf.placeholder(tf.float32, [None, trainIn.shape[1], trainIn.shape[2], 1]) Y = tf.placeholder(tf.int32, [None])
from util.data_reader import DataReader import numpy as np trainData = DataReader() def Test_ReadFolder(): trainIn, trainOut, testIn, testOut = trainData.GetData() print('trainIn', trainIn.shape) print('testIn', testIn.shape) print('trainOut', np.min(trainOut), np.max(trainOut)) print('trainIn') compareIndex = 2 print('label', compareIndex, trainOut[compareIndex]) print(trainIn[compareIndex]) print('testIn') print(testIn[compareIndex]) print('diff', np.mean(np.abs(trainIn[compareIndex] - testIn[compareIndex]))) Test_ReadFolder()
def main(_): config = prepare_dirs_and_logger(config_raw) save_config(config) rng = np.random.RandomState(config.random_seed) tf.set_random_seed(config.random_seed) config.rng = rng config.module_names = ['_key_find', '_key_filter', '_val_desc', '<eos>'] config.gt_layout_tokens = ['_key_find', '_key_filter', '_val_desc', '<eos>'] assembler = Assembler(config) sample_builder = SampleBuilder(config) config = sample_builder.config # update T_encoder according to data data_train = sample_builder.data_all['train'] data_reader_train = DataReader( config, data_train, assembler, shuffle=True, one_pass=False) num_vocab_txt = len(sample_builder.dict_all) num_vocab_nmn = len(assembler.module_names) num_choices = len(sample_builder.dict_all) # Network inputs text_seq_batch = tf.placeholder(tf.int32, [None, None]) seq_len_batch = tf.placeholder(tf.int32, [None]) ans_label_batch = tf.placeholder(tf.int32, [None]) use_gt_layout = tf.constant(True, dtype=tf.bool) gt_layout_batch = tf.placeholder(tf.int32, [None, None]) # The model for training model = Model( config, sample_builder.kb, text_seq_batch, seq_len_batch, num_vocab_txt=num_vocab_txt, num_vocab_nmn=num_vocab_nmn, EOS_idx=assembler.EOS_idx, num_choices=num_choices, decoder_sampling=True, use_gt_layout=use_gt_layout, gt_layout_batch=gt_layout_batch) compiler = model.compiler scores = model.scores log_seq_prob = model.log_seq_prob # Loss function softmax_loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=scores, labels=ans_label_batch) # The final per-sample loss, which is loss for valid expr # and invalid_expr_loss for invalid expr final_loss_per_sample = softmax_loss_per_sample # All exprs are valid avg_sample_loss = tf.reduce_mean(final_loss_per_sample) seq_likelihood_loss = tf.reduce_mean(-log_seq_prob) total_training_loss = seq_likelihood_loss + avg_sample_loss total_loss = total_training_loss + config.weight_decay * model.l2_reg # Train with Adam optimizer solver = tf.train.AdamOptimizer() gradients = solver.compute_gradients(total_loss) # Clip gradient by L2 norm gradients = [(tf.clip_by_norm(g, config.max_grad_norm), v) for g, v in gradients] solver_op = solver.apply_gradients(gradients) # Training operation with tf.control_dependencies([solver_op]): train_step = tf.constant(0) # Write summary to TensorBoard log_writer = tf.summary.FileWriter(config.log_dir, tf.get_default_graph()) loss_ph = tf.placeholder(tf.float32, []) entropy_ph = tf.placeholder(tf.float32, []) accuracy_ph = tf.placeholder(tf.float32, []) summary_train = [ tf.summary.scalar('avg_sample_loss', loss_ph), tf.summary.scalar('entropy', entropy_ph), tf.summary.scalar('avg_accuracy', accuracy_ph) ] log_step_train = tf.summary.merge(summary_train) # Training sess = tf.Session() sess.run(tf.global_variables_initializer()) snapshot_saver = tf.train.Saver(max_to_keep=None) # keep all snapshots show_all_variables() avg_accuracy = 0 accuracy_decay = 0.99 for n_iter, batch in enumerate(data_reader_train.batches()): if n_iter >= config.max_iter: break # set up input and output tensors h = sess.partial_run_setup( fetches=[ model.predicted_tokens, model.entropy_reg, scores, avg_sample_loss, train_step ], feeds=[ text_seq_batch, seq_len_batch, gt_layout_batch, compiler.loom_input_tensor, ans_label_batch ]) # Part 1: Generate module layout tokens, entropy_reg_val = sess.partial_run( h, fetches=(model.predicted_tokens, model.entropy_reg), feed_dict={ text_seq_batch: batch['input_seq_batch'], seq_len_batch: batch['seq_len_batch'], gt_layout_batch: batch['gt_layout_batch'] }) # Assemble the layout tokens into network structure expr_list, expr_validity_array = assembler.assemble(tokens) # all exprs should be valid (since they are ground-truth) assert np.all(expr_validity_array) labels = batch['ans_label_batch'] # Build TensorFlow Fold input for NMN expr_feed = compiler.build_feed_dict(expr_list) expr_feed[ans_label_batch] = labels # Part 2: Run NMN and learning steps scores_val, avg_sample_loss_val, _ = sess.partial_run( h, fetches=(scores, avg_sample_loss, train_step), feed_dict=expr_feed) # Compute accuracy predictions = np.argmax(scores_val, axis=1) accuracy = np.mean( np.logical_and(expr_validity_array, predictions == labels)) avg_accuracy += (1 - accuracy_decay) * (accuracy - avg_accuracy) # Add to TensorBoard summary if (n_iter + 1) % config.log_interval == 0: tf.logging.info('iter = %d\n\t' 'loss = %f, accuracy (cur) = %f, ' 'accuracy (avg) = %f, entropy = %f' % (n_iter + 1, avg_sample_loss_val, accuracy, avg_accuracy, -entropy_reg_val)) summary = sess.run( fetches=log_step_train, feed_dict={ loss_ph: avg_sample_loss_val, entropy_ph: -entropy_reg_val, accuracy_ph: avg_accuracy }) log_writer.add_summary(summary, n_iter + 1) # Save snapshot if (n_iter + 1) % config.snapshot_interval == 0: snapshot_file = os.path.join(config.model_dir, '%08d' % (n_iter + 1)) snapshot_saver.save(sess, snapshot_file, write_meta_graph=False) tf.logging.info('Snapshot saved to %s' % snapshot_file) tf.logging.info('Run finished.')
def main(_): config = prepare_dirs_and_logger(config_raw) rng = np.random.RandomState(config.random_seed) tf.set_random_seed(config.random_seed) config.rng = rng config.module_names = ['_key_find', '_key_filter', '_val_desc', '<eos>'] config.gt_layout_tokens = ['_key_find', '_key_filter', '_val_desc', '<eos>'] assembler = Assembler(config) sample_builder = SampleBuilder(config) config = sample_builder.config # update T_encoder according to data data_test = sample_builder.data_all['test'] data_reader_test = DataReader( config, data_test, assembler, shuffle=False, one_pass=True) num_vocab_txt = len(sample_builder.dict_all) num_vocab_nmn = len(assembler.module_names) num_choices = len(sample_builder.dict_all) # Network inputs text_seq_batch = tf.placeholder(tf.int32, [None, None]) seq_len_batch = tf.placeholder(tf.int32, [None]) # The model model = Model( config, sample_builder.kb, text_seq_batch, seq_len_batch, num_vocab_txt=num_vocab_txt, num_vocab_nmn=num_vocab_nmn, EOS_idx=assembler.EOS_idx, num_choices=num_choices, decoder_sampling=False) compiler = model.compiler scores = model.scores sess = tf.Session() sess.run(tf.global_variables_initializer()) snapshot_file = os.path.join(config.model_dir, FLAGS.snapshot_name) tf.logging.info('Snapshot file: %s' % snapshot_file) snapshot_saver = tf.train.Saver() snapshot_saver.restore(sess, snapshot_file) # Evaluation metrics num_questions = len(data_test.Y) tf.logging.info('# of test questions: %d' % num_questions) answer_correct = 0 layout_correct = 0 layout_valid = 0 for batch in data_reader_test.batches(): # set up input and output tensors h = sess.partial_run_setup( fetches=[model.predicted_tokens, scores], feeds=[text_seq_batch, seq_len_batch, compiler.loom_input_tensor]) # Part 1: Generate module layout tokens = sess.partial_run( h, fetches=model.predicted_tokens, feed_dict={ text_seq_batch: batch['input_seq_batch'], seq_len_batch: batch['seq_len_batch'] }) # Compute accuracy of the predicted layout gt_tokens = batch['gt_layout_batch'] layout_correct += np.sum( np.all( np.logical_or(tokens == gt_tokens, gt_tokens == assembler.EOS_idx), axis=0)) # Assemble the layout tokens into network structure expr_list, expr_validity_array = assembler.assemble(tokens) layout_valid += np.sum(expr_validity_array) labels = batch['ans_label_batch'] # Build TensorFlow Fold input for NMN expr_feed = compiler.build_feed_dict(expr_list) # Part 2: Run NMN and learning steps scores_val = sess.partial_run(h, scores, feed_dict=expr_feed) # Compute accuracy predictions = np.argmax(scores_val, axis=1) answer_correct += np.sum( np.logical_and(expr_validity_array, predictions == labels)) answer_accuracy = answer_correct * 1.0 / num_questions layout_accuracy = layout_correct * 1.0 / num_questions layout_validity = layout_valid * 1.0 / num_questions tf.logging.info('test answer accuracy = %f, ' 'test layout accuracy = %f, ' 'test layout validity = %f' % (answer_accuracy, layout_accuracy, layout_validity))
def main(_): config = prepare_dirs_and_logger(config_raw) save_config(config) rng = np.random.RandomState(config.random_seed) tf.set_random_seed(config.random_seed) config.rng = rng config.module_names = ['_key_find', '_key_filter', '_val_desc', '<eos>'] config.gt_layout_tokens = [ '_key_find', '_key_filter', '_val_desc', '<eos>' ] assembler = Assembler(config) sample_builder = SampleBuilder(config) config = sample_builder.config # update T_encoder according to data data_train = sample_builder.data_all['train'] data_reader_train = DataReader(config, data_train, assembler, shuffle=True, one_pass=False) num_vocab_txt = len(sample_builder.dict_all) num_vocab_nmn = len(assembler.module_names) num_choices = len(sample_builder.dict_all) # Network inputs text_seq_batch = tf.placeholder(tf.int32, [None, None]) seq_len_batch = tf.placeholder(tf.int32, [None]) ans_label_batch = tf.placeholder(tf.int32, [None]) use_gt_layout = tf.constant(True, dtype=tf.bool) gt_layout_batch = tf.placeholder(tf.int32, [None, None]) # The model for training model = Model(config, sample_builder.kb, text_seq_batch, seq_len_batch, num_vocab_txt=num_vocab_txt, num_vocab_nmn=num_vocab_nmn, EOS_idx=assembler.EOS_idx, num_choices=num_choices, decoder_sampling=True, use_gt_layout=use_gt_layout, gt_layout_batch=gt_layout_batch) compiler = model.compiler scores = model.scores log_seq_prob = model.log_seq_prob # Loss function softmax_loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=scores, labels=ans_label_batch) # The final per-sample loss, which is loss for valid expr # and invalid_expr_loss for invalid expr final_loss_per_sample = softmax_loss_per_sample # All exprs are valid avg_sample_loss = tf.reduce_mean(final_loss_per_sample) seq_likelihood_loss = tf.reduce_mean(-log_seq_prob) total_training_loss = seq_likelihood_loss + avg_sample_loss total_loss = total_training_loss + config.weight_decay * model.l2_reg # Train with Adam optimizer solver = tf.train.AdamOptimizer() gradients = solver.compute_gradients(total_loss) # Clip gradient by L2 norm gradients = [(tf.clip_by_norm(g, config.max_grad_norm), v) for g, v in gradients] solver_op = solver.apply_gradients(gradients) # Training operation with tf.control_dependencies([solver_op]): train_step = tf.constant(0) # Write summary to TensorBoard log_writer = tf.summary.FileWriter(config.log_dir, tf.get_default_graph()) loss_ph = tf.placeholder(tf.float32, []) entropy_ph = tf.placeholder(tf.float32, []) accuracy_ph = tf.placeholder(tf.float32, []) summary_train = [ tf.summary.scalar('avg_sample_loss', loss_ph), tf.summary.scalar('entropy', entropy_ph), tf.summary.scalar('avg_accuracy', accuracy_ph) ] log_step_train = tf.summary.merge(summary_train) # Training sess = tf.Session() sess.run(tf.global_variables_initializer()) snapshot_saver = tf.train.Saver(max_to_keep=None) # keep all snapshots show_all_variables() avg_accuracy = 0 accuracy_decay = 0.99 for n_iter, batch in enumerate(data_reader_train.batches()): if n_iter >= config.max_iter: break # set up input and output tensors h = sess.partial_run_setup(fetches=[ model.predicted_tokens, model.entropy_reg, scores, avg_sample_loss, train_step ], feeds=[ text_seq_batch, seq_len_batch, gt_layout_batch, compiler.loom_input_tensor, ans_label_batch ]) # Part 1: Generate module layout tokens, entropy_reg_val = sess.partial_run( h, fetches=(model.predicted_tokens, model.entropy_reg), feed_dict={ text_seq_batch: batch['input_seq_batch'], seq_len_batch: batch['seq_len_batch'], gt_layout_batch: batch['gt_layout_batch'] }) # Assemble the layout tokens into network structure expr_list, expr_validity_array = assembler.assemble(tokens) # all exprs should be valid (since they are ground-truth) assert np.all(expr_validity_array) labels = batch['ans_label_batch'] # Build TensorFlow Fold input for NMN expr_feed = compiler.build_feed_dict(expr_list) expr_feed[ans_label_batch] = labels # Part 2: Run NMN and learning steps scores_val, avg_sample_loss_val, _ = sess.partial_run( h, fetches=(scores, avg_sample_loss, train_step), feed_dict=expr_feed) # Compute accuracy predictions = np.argmax(scores_val, axis=1) accuracy = np.mean( np.logical_and(expr_validity_array, predictions == labels)) avg_accuracy += (1 - accuracy_decay) * (accuracy - avg_accuracy) # Add to TensorBoard summary if (n_iter + 1) % config.log_interval == 0: tf.logging.info('iter = %d\n\t' 'loss = %f, accuracy (cur) = %f, ' 'accuracy (avg) = %f, entropy = %f' % (n_iter + 1, avg_sample_loss_val, accuracy, avg_accuracy, -entropy_reg_val)) summary = sess.run(fetches=log_step_train, feed_dict={ loss_ph: avg_sample_loss_val, entropy_ph: -entropy_reg_val, accuracy_ph: avg_accuracy }) log_writer.add_summary(summary, n_iter + 1) # Save snapshot if (n_iter + 1) % config.snapshot_interval == 0: snapshot_file = os.path.join(config.model_dir, '%08d' % (n_iter + 1)) snapshot_saver.save(sess, snapshot_file, write_meta_graph=False) tf.logging.info('Snapshot saved to %s' % snapshot_file) tf.logging.info('Run finished.')