def train(): # Prepare data. print("Preparing Train & Eval data in %s" % FLAGS.data_dir) for d in FLAGS.data_dir, FLAGS.model_dir: if not os.path.exists(d): os.makedirs(d) data = Data(FLAGS.model_dir,FLAGS.data_dir, FLAGS.vocab_size, FLAGS.max_seq_length) epoc_steps = len(data.rawTrainPosCorpus) / FLAGS.batch_size print( "Training Data: %d total positive samples, each epoch need %d steps" % (len(data.rawTrainPosCorpus), epoc_steps ) ) cfg = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) with tf.Session(config=cfg) as sess: model = create_model( sess, data.rawnegSetLen, data.vocab_size, False ) #setup tensorboard logging sw = tf.summary.FileWriter( logdir=FLAGS.model_dir, graph=sess.graph, flush_secs=120) summary_op = model.add_summaries() # This is the training loop. step_time, loss, train_acc = 0.0, 0.0, 0.0 current_step = 0 previous_accuracies = [] for epoch in range( FLAGS.max_epoc ): epoc_start_Time = time.time() for batchId in range( epoc_steps ): start_time = time.time() source_inputs, tgt_inputs, labels = data.get_train_batch(FLAGS.batch_size) model.set_forward_only(False) d = model.get_train_feed_dict(source_inputs, tgt_inputs, labels) ops = [model.train, summary_op, model.loss, model.train_acc ] _, summary, step_loss, step_train_acc = sess.run(ops, feed_dict=d) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint train_acc += step_train_acc / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: print ("global epoc: %.3f, global step %d, learning rate %.4f step-time:%.2f loss:%.4f train_binary_acc:%.4f " % ( float(model.global_step.eval())/ float(epoc_steps), model.global_step.eval(), model.learning_rate.eval(), step_time, step_loss, train_acc )) checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt") acc_sum = tf.Summary(value=[tf.Summary.Value(tag="train_binary_acc", simple_value=train_acc)]) sw.add_summary(acc_sum, current_step) # #########debugging########## # model.set_forward_only(True) # sse_index.createIndexFile(model, encoder, os.path.join(FLAGS.model_dir, FLAGS.rawfilename), # FLAGS.max_seq_length, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile), sess, # batchsize=1000) # evaluator = sse_evaluator.Evaluator(model, eval_corpus, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile), # sess) # acc1, acc3, acc10 = evaluator.eval() # print("epoc# %.3f, task specific evaluation: top 1/3/10 accuracies: %f / %f / %f " % (float(model.global_step.eval())/ float(epoc_steps), acc1, acc3, acc10)) # ###end of debugging######## # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_accuracies) > 3 and train_acc < min(previous_accuracies[-2:]): sess.run(model.learning_rate_decay_op) previous_accuracies.append(train_acc) # save currently best-ever model if train_acc == max(previous_accuracies): print("Better Accuracy %.4f found. Saving current best model ..." % train_acc ) model.save(sess, checkpoint_path + "-BestEver") else: print("Best Accuracy is: %.4f, while current round is: %.4f" % (max(previous_accuracies), train_acc) ) print("skip saving model ...") # if finished at least 2 Epocs and still no further accuracy improvement, stop training # report the best accuracy number and final model's number and save it. if epoch > 10 and train_acc < min(previous_accuracies[-5:]): p = model.save(sess, checkpoint_path + "-final") print("After around %d Epocs no further improvement, Training finished, wrote checkpoint to %s." % (epoch, p) ) break # reset current checkpoint step statistics step_time, loss, train_acc = 0.0, 0.0, 0.0 epoc_train_time = time.time() - epoc_start_Time print('\n\n\nepoch# %d took %f hours' % ( epoch , epoc_train_time / (60.0 * 60) ) ) # run task specific evaluation afer each epoch if (FLAGS.task_type not in ['ranking', 'crosslingual']) or ( (epoch+1) % 20 == 0 ): model.set_forward_only(True) sse_index.createIndexFile( model, data.encoder, os.path.join(FLAGS.model_dir, FLAGS.rawfilename), FLAGS.max_seq_length, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile), sess, batchsize=1000 ) evaluator = sse_evaluator.Evaluator(model, data.rawEvalCorpus, os.path.join(FLAGS.model_dir, FLAGS.encodedIndexFile) , sess) acc1, acc3, acc10 = evaluator.eval() print("epoc#%d, task specific evaluation: top 1/3/10 accuracies: %f / %f / %f \n\n\n" % (epoch, acc1, acc3, acc10) ) # Save checkpoint at end of each epoch checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt") model.save(sess, checkpoint_path + '-epoch-%d'%epoch) if len(previous_accuracies) > 0: print('So far best ever model training binary accuracy is: %.4f ' % max(previous_accuracies) )
def train(): # Prepare data. print("Preparing Train & Eval data in %s" % FLAGS.data_dir) for d in FLAGS.data_dir, FLAGS.model_dir: if not os.path.exists(d): os.makedirs(d) encoded_train_pair_path, encoded_eval_pair_path, encodedFullTargetSpace_path, _, _ = data_utils.prepare_raw_data( FLAGS.data_dir, FLAGS.model_dir, FLAGS.src_vocab_size, FLAGS.tgt_vocab_size) #load full set targetSeqID data tgtID_EncodingMap, tgtID_FullLableMap, fullLabel_tgtID_Map, target_inputs, target_lens = load_encodedTargetSpace( encodedFullTargetSpace_path) #load full set train data print("Reading development and training data ...") train_set, epoc_steps = read_train_data(encoded_train_pair_path, tgtID_EncodingMap) print("Training Data: %d total samples, each epoch need %d steps" % (len(train_set), epoc_steps)) #load eval data eval_src_seqs, eval_src_lens, eval_tgtIDs = get_eval_set( encoded_eval_pair_path) cfg = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) with tf.device('/' + FLAGS.device), tf.Session(config=cfg) as sess: # Create SSE model and build tensorflow training graph. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.embedding_size)) model = create_model(sess, len(tgtID_FullLableMap), False) #setup evaluation graph evaluator = sse_evaluator.Evaluator(model, eval_src_seqs, eval_src_lens, eval_tgtIDs, target_inputs, target_lens, tgtID_FullLableMap, sess) #setup tensorboard logging sw = tf.train.SummaryWriter(FLAGS.model_dir, sess.graph, flush_secs=120) summary_op = model.add_summaries() # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_accuracies = [] for epoch in range(FLAGS.max_epoc): epoc_start_Time = time.time() random.shuffle(train_set, random.random) for batchId in range(epoc_steps - int(2.5 * FLAGS.steps_per_checkpoint) ): #basic drop out here start_time = time.time() source_inputs, labels, src_lens = [], [], [] for idx in xrange(FLAGS.batch_size): source_input, src_len, tgtID = train_set[batchId * FLAGS.batch_size + idx] source_inputs.append(source_input) labels.append(tgtID_FullLableMap[tgtID]) src_lens.append(src_len) d = model.get_train_feed_dict(source_inputs, target_inputs, labels, src_lens, target_lens) ops = [model.train, summary_op, model.loss] _, summary, step_loss = sess.run(ops, feed_dict=d) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: print( "global epoc: %.3f, global step %d, learning rate %.4f step-time:%.2f loss:%.4f " % (float(model.global_step.eval()) / float(epoc_steps), model.global_step.eval(), model.learning_rate.eval(), step_time, step_loss)) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt") model.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their accuracy number. t = time.time() acc1, acc3, acc10 = evaluator.eval() acc_sum = tf.Summary(value=[ tf.Summary.Value(tag="acc1", simple_value=acc1), tf.Summary.Value(tag="acc3", simple_value=acc3), tf.Summary.Value(tag="acc10", simple_value=acc10) ]) sw.add_summary(acc_sum, current_step) print( "Step %d, top 1/3/10 accuracies: %f / %f / %f, (eval took %f seconds) " % (current_step, acc1, acc3, acc10, time.time() - t)) sys.stdout.flush() # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_accuracies) > 2 and acc1 < min( previous_accuracies[-3:]): sess.run(model.learning_rate_decay_op) previous_accuracies.append(acc1) # save currently best-ever model if acc1 == max(previous_accuracies): model.save(sess, checkpoint_path + "-BestEver") # if finished at least 2 Epocs and still no further accuracy improvement, stop training # report the best accuracy number and final model's number and save it. if epoch > 2 and acc1 < min(previous_accuracies[-3:]): p = model.save(sess, checkpoint_path + "-final") print( "After around %d Epocs no further improvement, Training finished, wrote checkpoint to %s." % (epoch, p)) print( "Best ever top1 accuracy: %.2f , Final top 1 / 3 / 10 accuracies: %.2f / %.2f / %.2f" % (max(previous_accuracies), acc1, acc3, acc10)) break #give out epoc statistics epoc_train_time = time.time() - epoc_start_Time print('epoch# %d took %f hours' % (epoch, epoc_train_time / (60.0 * 60))) # Save checkpoint at end of each epoch checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt") model.save(sess, checkpoint_path + '-epoch-%d' % epoch) if len(previous_accuracies) > 0: print('So far best ever model top1 accuracy is: %.4f ' % max(previous_accuracies))
def train(): # Prepare data. print("Preparing Train & Eval data in %s" % FLAGS.data_dir) for d in FLAGS.data_dir, FLAGS.model_dir: if not os.path.exists(d): os.makedirs(d) encoder, train_corpus, dev_corpus, encodedTgtSpace, tgtIdNameMap = data_utils.prepare_raw_data( FLAGS.data_dir, FLAGS.model_dir, FLAGS.vocab_size, FLAGS.task_type, FLAGS.max_seq_length) epoc_steps = int(math.floor(len(train_corpus) / FLAGS.batch_size)) print( "Training Data: %d total samples (pos + neg), each epoch need %d steps" % (len(train_corpus), epoc_steps)) cfg = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) with tf.Session(config=cfg) as sess: # Create SSE model and build tensorflow training graph. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.embedding_size)) model = create_model(sess, len(encodedTgtSpace), encoder.vocab_size, False) #setup evaluation graph evaluator = sse_evaluator.Evaluator(model, dev_corpus, encodedTgtSpace, sess) #setup tensorboard logging sw = tf.summary.FileWriter(logdir=FLAGS.model_dir, graph=sess.graph, flush_secs=120) summary_op = model.add_summaries() # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_accuracies = [] fullSetTargetIds = set(encodedTgtSpace.keys()) for epoch in range(FLAGS.max_epoc): epoc_start_Time = time.time() random.shuffle(train_corpus, random.random) for batchId in range(epoc_steps - int(2.5 * FLAGS.steps_per_checkpoint) ): #basic drop out here start_time = time.time() source_inputs, src_lens, tgt_inputs, tgt_lens, labels = [], [], [], [], [] for idx in range(FLAGS.batch_size): source_input, tgtId = train_corpus[batchId * FLAGS.batch_size + idx] #add positive pair source_inputs.append(source_input) src_lens.append( source_input.index(text_encoder.PAD_ID) + 1) tgt_inputs.append(encodedTgtSpace[tgtId]) tgt_lens.append( encodedTgtSpace[tgtId].index(text_encoder.PAD_ID) + 1) labels.append(1.0) #add negative pair negTgt = random.sample(fullSetTargetIds - set([tgtId]), 1)[0] source_inputs.append(source_input) src_lens.append( source_input.index(text_encoder.PAD_ID) + 1) tgt_inputs.append(encodedTgtSpace[negTgt]) tgt_lens.append( encodedTgtSpace[negTgt].index(text_encoder.PAD_ID) + 1) labels.append(0.0) d = model.get_train_feed_dict(source_inputs, tgt_inputs, labels, src_lens, tgt_lens) ops = [model.train, summary_op, model.loss] _, summary, step_loss = sess.run(ops, feed_dict=d) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: print( "global epoc: %.3f, global step %d, learning rate %.4f step-time:%.2f loss:%.4f " % (float(model.global_step.eval()) / float(epoc_steps), model.global_step.eval(), model.learning_rate.eval(), step_time, step_loss)) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt") # model.save(sess, checkpoint_path, global_step=model.global_step) #only save better models step_time, loss = 0.0, 0.0 # Run evals on development set and print their accuracy number. t = time.time() acc1, acc3, acc10 = evaluator.eval() acc_sum = tf.Summary(value=[ tf.Summary.Value(tag="acc1", simple_value=acc1), tf.Summary.Value(tag="acc3", simple_value=acc3), tf.Summary.Value(tag="acc10", simple_value=acc10) ]) sw.add_summary(acc_sum, current_step) print( "Step %d, top 1/3/10 accuracies: %f / %f / %f, (eval took %f seconds) " % (current_step, acc1, acc3, acc10, time.time() - t)) sys.stdout.flush() # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_accuracies) > 2 and acc1 < min( previous_accuracies[-3:]): sess.run(model.learning_rate_decay_op) previous_accuracies.append(acc1) # save currently best-ever model if acc1 == max(previous_accuracies): print( "Better Accuracy %f found. Saving current best model ..." % acc1) model.save(sess, checkpoint_path + "-BestEver") else: print( "Best Accuracy is: %f, while current round is: %f" % (max(previous_accuracies), acc1)) print("skip saving model ...") # if finished at least 2 Epocs and still no further accuracy improvement, stop training # report the best accuracy number and final model's number and save it. if epoch > 2 and acc1 < min(previous_accuracies[-3:]): p = model.save(sess, checkpoint_path + "-final") print( "After around %d Epocs no further improvement, Training finished, wrote checkpoint to %s." % (epoch, p)) print( "Best ever top1 accuracy: %.2f , Final top 1 / 3 / 10 accuracies: %.2f / %.2f / %.2f" % (max(previous_accuracies), acc1, acc3, acc10)) break #give out epoc statistics epoc_train_time = time.time() - epoc_start_Time print('epoch# %d took %f hours' % (epoch, epoc_train_time / (60.0 * 60))) # Save checkpoint at end of each epoch checkpoint_path = os.path.join(FLAGS.model_dir, "SSE-LSTM.ckpt") model.save(sess, checkpoint_path + '-epoch-%d' % epoch) if len(previous_accuracies) > 0: print('So far best ever model top1 accuracy is: %.4f ' % max(previous_accuracies))