.format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) # if avg_f1 > 56: # return True # # else: # return False return avg_f1 # Generate batches batches = cnn_data_helpers.batch_iter( list(zip(x_train, y_train, x_lex_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch, x_batch_lex = zip(*batch) train_step(x_batch, y_batch, x_batch_lex) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") curr_af1_dev = dev_step(x_dev, y_dev, x_lex_dev, writer=dev_summary_writer) # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) print("")
def run_train(w2vdim, lexdim, lexnumfilters, sample_test = True): if sample_test == True: print '======================================[sample test]======================================' max_len = 60 with Timer("lex"): norm_model, raw_model = load_lexicon_unigram(lexdim) with Timer("w2v"): w2vmodel = load_w2v(w2vdim, sample_test=sample_test) unigram_lexicon_model = norm_model # unigram_lexicon_model = raw_model if sample_test: x_train, y_train, x_lex_train = cnn_data_helpers.load_data('trn_sample',w2vmodel, unigram_lexicon_model, max_len) x_dev, y_dev, x_lex_dev = cnn_data_helpers.load_data('dev_sample', w2vmodel, unigram_lexicon_model, max_len) x_test, y_test, x_lex_test = cnn_data_helpers.load_data('tst_sample', w2vmodel, unigram_lexicon_model, max_len) else: x_train, y_train, x_lex_train = cnn_data_helpers.load_data('trn', w2vmodel, unigram_lexicon_model, max_len) x_dev, y_dev, x_lex_dev = cnn_data_helpers.load_data('dev', w2vmodel, unigram_lexicon_model, max_len) x_test, y_test, x_lex_test = cnn_data_helpers.load_data('tst', w2vmodel, unigram_lexicon_model, max_len) # x_train, y_train = cnn_data_helpers.load_data('trn',w2vmodel , max_len) # x_dev, y_dev = cnn_data_helpers.load_data('dev', w2vmodel, max_len) # x_test, y_test = cnn_data_helpers.load_data('tst', w2vmodel, max_len) del(w2vmodel) del(norm_model) del(raw_model) gc.collect() print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== if not FLAGS.random_seed: tf.set_random_seed(FLAGS.seed_number) with tf.Graph().as_default(): max_af1_dev = 0 index_at_max_af1_dev = 0 af1_tst_at_max_af1_dev = 0 session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if not FLAGS.random_seed: tf.set_random_seed(FLAGS.seed_number) cnn = TextCNNAttentionSimpleUT( sequence_length=x_train.shape[1], num_classes=3, embedding_size=w2vdim, embedding_size_lex=lexdim, num_filters_lex = lexnumfilters, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) f1_summary = tf.scalar_summary("avg_f1", cnn.avg_f1) # Train Summaries train_summary_op = tf.merge_summary([loss_summary, acc_summary, f1_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph_def) # Dev summaries dev_summary_op = tf.merge_summary([loss_summary, acc_summary, f1_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph_def) # Test summaries test_summary_op = tf.merge_summary([loss_summary, acc_summary, f1_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.train.SummaryWriter(test_summary_dir, sess.graph_def) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) # Initialize all variables sess.run(tf.initialize_all_variables()) def train_step(x_batch, y_batch, x_batch_lex): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, # lexicon cnn.input_x_lexicon: x_batch_lex, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1, hh, hhl = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1, cnn.h_lex_list[0], cnn.h_list[0]], feed_dict) time_str = datetime.datetime.now().isoformat() # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}". format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, x_batch_lex, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, # lexicon cnn.input_x_lexicon: x_batch_lex, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}". format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) return avg_f1 # if avg_f1>56: # return True # # else: # return False def test_step(x_batch, y_batch, x_batch_lex, writer=None): """ Evaluates model on a test set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, # lexicon cnn.input_x_lexicon: x_batch_lex, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}". format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) # if avg_f1 > 56: # return True # # else: # return False return avg_f1 # Generate batches batches = cnn_data_helpers.batch_iter( list(zip(x_train, y_train, x_lex_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch, x_batch_lex = zip(*batch) train_step(x_batch, y_batch, x_batch_lex) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") curr_af1_dev = dev_step(x_dev, y_dev, x_lex_dev, writer=dev_summary_writer) # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) print("") print("\nTest:") curr_af1_tst = test_step(x_test, y_test, x_lex_test, writer=test_summary_writer) # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) print("") if curr_af1_dev > max_af1_dev: max_af1_dev = curr_af1_dev index_at_max_af1_dev = current_step af1_tst_at_max_af1_dev = curr_af1_tst print '\nStatus:\n[%d] Max f1 for dev (%f), Max f1 for tst (%f)\n' % ( index_at_max_af1_dev, max_af1_dev, af1_tst_at_max_af1_dev) sys.stdout.flush()
cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}". format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) if avg_f1 > 56: return True else: return False # Generate batches batches = cnn_data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") if dev_step(x_dev, y_dev, writer=dev_summary_writer) is True: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) print("") # if current_step % FLAGS.test_every == 0: # print("\nTest:") # if test_step(x_test, y_test, writer=test_summary_writer) is True:
def run_train(w2vdim, w2vnumfilters, lexdim, lexnumfilters, randomseed, withlexicon): max_len = 60 with Timer("lex"): norm_model, raw_model = load_lexicon_unigram(lexdim) with Timer("w2v"): w2vmodel = load_w2v(w2vdim) unigram_lexicon_model = norm_model # unigram_lexicon_model = raw_model x_train, y_train, x_lex_train = cnn_data_helpers.load_data( 'trn', w2vmodel, unigram_lexicon_model, max_len) x_dev, y_dev, x_lex_dev = cnn_data_helpers.load_data( 'dev', w2vmodel, unigram_lexicon_model, max_len) x_test, y_test, x_lex_test = cnn_data_helpers.load_data( 'tst', w2vmodel, unigram_lexicon_model, max_len) del (w2vmodel) del (norm_model) del (raw_model) gc.collect() print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== if randomseed > 0: tf.set_random_seed(randomseed) with tf.Graph().as_default(): max_af1_dev = 0 index_at_max_af1_dev = 0 af1_tst_at_max_af1_dev = 0 session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if randomseed > 0: tf.set_random_seed(randomseed) cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=3, embedding_size=w2vdim, embedding_size_lex=lexdim, lex_filter_size=lexnumfilters, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=w2vnumfilters, with_lexicon=withlexicon, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) f1_summary = tf.scalar_summary("avg_f1", cnn.avg_f1) # Train Summaries train_summary_op = tf.merge_summary( [loss_summary, acc_summary, f1_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.train.SummaryWriter( train_summary_dir, sess.graph_def) # Dev summaries dev_summary_op = tf.merge_summary( [loss_summary, acc_summary, f1_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.train.SummaryWriter( dev_summary_dir, sess.graph_def) # Test summaries test_summary_op = tf.merge_summary( [loss_summary, acc_summary, f1_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.train.SummaryWriter( test_summary_dir, sess.graph_def) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) # Initialize all variables sess.run(tf.initialize_all_variables()) def train_step(x_batch, y_batch, x_batch_lex): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, # lexicon cnn.input_x_lexicon: x_batch_lex, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1 ], feed_dict) time_str = datetime.datetime.now().isoformat() # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) #print("{}: step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}". # format(time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, x_batch_lex, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, # lexicon cnn.input_x_lexicon: x_batch_lex, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1 ], feed_dict) time_str = datetime.datetime.now().isoformat() print( "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}" .format("DEV", time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) return avg_f1 def test_step(x_batch, y_batch, x_batch_lex, writer=None): """ Evaluates model on a test set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, # lexicon cnn.input_x_lexicon: x_batch_lex, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1 ], feed_dict) time_str = datetime.datetime.now().isoformat() print( "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}" .format("TEST", time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) return avg_f1 # Generate batches batches = cnn_data_helpers.batch_iter( list(zip(x_train, y_train, x_lex_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch, x_batch_lex = zip(*batch) train_step(x_batch, y_batch, x_batch_lex) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("Evaluation:") curr_af1_dev = dev_step(x_dev, y_dev, x_lex_dev, writer=dev_summary_writer) # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) curr_af1_tst = test_step(x_test, y_test, x_lex_test, writer=test_summary_writer) # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) if curr_af1_dev > max_af1_dev: max_af1_dev = curr_af1_dev index_at_max_af1_dev = current_step af1_tst_at_max_af1_dev = curr_af1_tst print 'Status: [%d] Max f1 for dev (%f), Max f1 for tst (%f)\n' % ( index_at_max_af1_dev, max_af1_dev, af1_tst_at_max_af1_dev) sys.stdout.flush()