def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) while True: sent = raw_input("Enter a sentence: ") output_sent = fix_sent(model, sess, sent) print("Candidate: ", output_sent)
def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) config = tf.ConfigProto( device_count={'GPU': 0} ) with tf.Session(config=config) as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) while True: sent = input("Enter a sentence: ") output_sent = fix_sent(model, sess, sent) print("Candidate: ", output_sent)
def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) # import codecs # outfile = open('predictions.txt','w') # outfile2 = open('predictions_all.txt','w') # outfile = open('lambda_train.txt','w') # infile = open(FLAGS.data_dir+'/'+FLAGS.tokenizer.lower()+'/test.y.txt') # lines = infile.readlines() # index = 0 # with codecs.open(FLAGS.data_dir+'/'+FLAGS.tokenizer.lower()+'/test.x.txt', encoding='utf-8') as fr: # with codecs.open('ytc_test.txt', encoding='utf-8') as fr: # for sent in fr: # print("Original: ", sent.strip().encode('utf-8')) # output_sent,all_sents,all_prob,all_lmscore = fix_sent(model, sess, sent.encode('utf-8')) # print("Revised: ", output_sent) # print('*'*30) # outfile.write(output_sent+'\n') # outfile2.write('\t'.join(all_sents)+'\n') # correct_sent = lines[index].strip('\n').strip('\r') # for i in range(len(all_sents)): # if all_sents[i] == correct_sent: # outfile.write('10 qid:'+str(index)+' 1:'+str(all_prob[i])+' 2:'+str(all_lmscore[i])+' #'+all_sents[i]+'\n') # else: # outfile.write('0 qid:'+str(index)+' 1:'+str(all_prob[i])+' 2:'+str(all_lmscore[i])+' #'+all_sents[i]+'\n') # index+=1 while True: sent = raw_input("Enter a sentence: ") output_sent, _, _, _ = fix_sent(model, sess, sent) print("Candidate: ", output_sent)
def load_vocab(): # Prepare NLC data. global reverse_vocab, vocab, vocab_size, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=nlc_data.char_tokenizer) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) # print(vocab) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size)
def setup_batch_decode(sess): # decode for dev-sets, in batches global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS), other_dev_path="/deep/group/nlp_data/nlc_data/ourdev/bpe") vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path, bpe=(FLAGS.tokenizer.lower()=="bpe")) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) return model, x_dev, y_dev
def setup_batch_decode(sess): # decode for dev-sets, in batches global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) return model, x_dev, y_dev
def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) if FLAGS.interactive: while True: sent = raw_input("Enter a sentence: ") if sent == 'exit': exit(0) output_sent = fix_sent(model, sess, sent.decode('utf-8')) print("Candidate: ", output_sent) else: test_x_data = os.path.join(FLAGS.data_dir, FLAGS.tokenizer.lower()+'/test.x.txt') if not os.path.exists(test_x_data): print("Please provide {} to test.".format(test_x_data)) exit(-1) with codecs.open(test_x_data, encoding='utf-8') as fr: for sent in fr: print("Original: ", sent.strip().encode('utf-8')) output_sent = fix_sent(model, sess, sent1) print("Revised: ", output_sent.encode('utf-8')) print('*'*30)
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) print('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum( map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 previous_losses = [] exp_cost = None while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 exp_length = None exp_norm = None ## Train for source_tokens, source_mask, target_tokens, target_mask in pair_iter( x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train( sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99 * exp_cost + 0.01 * cost exp_length = 0.99 * exp_length + 0.01 * mean_length exp_norm = 0.99 * exp_norm + 0.01 * grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: print( 'epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, iter_time, mean_length, std_length)) ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) ## Validate valid_cost = validate(model, sess, x_dev, y_dev) print("Epoch %d Validation cost: %f" % (epoch, valid_cost)) previous_losses.append(valid_cost) if len(previous_losses) > 2 and valid_cost > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. logging.info("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) logging.info("Vocabulary size: %d" % vocab_size) if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler("{0}/log.txt".format(FLAGS.train_dir)) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) logging.info('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum( map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 best_epoch = 0 previous_losses = [] exp_cost = None exp_length = None exp_norm = None total_iters = 0 start_time = time.time() while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 ## Train epoch_tic = time.time() for source_tokens, source_mask, target_tokens, target_mask in pair_iter( x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train( sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic total_iters += np.sum(target_mask) tps = total_iters / (time.time() - start_time) current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99 * exp_cost + 0.01 * cost exp_length = 0.99 * exp_length + 0.01 * mean_length exp_norm = 0.99 * exp_norm + 0.01 * grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: logging.info( 'epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, tps %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, tps, mean_length, std_length)) epoch_toc = time.time() ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") ## Validate valid_cost = validate(model, sess, x_dev, y_dev) logging.info("Epoch %d Validation cost: %f time: %f" % (epoch, valid_cost, epoch_toc - epoch_tic)) if len(previous_losses) > 2 and valid_cost > previous_losses[-1]: logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) sess.run(model.learning_rate_decay_op) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) else: previous_losses.append(valid_cost) best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. logging.info("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) logging.info("Vocabulary size: %d" % vocab_size) if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler("{0}/log.txt".format(FLAGS.train_dir)) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) logging.info('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print ("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 best_epoch = 0 previous_losses = [] exp_cost = None exp_length = None exp_norm = None while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 ## Train epoch_tic = time.time() for source_tokens, source_mask, target_tokens, target_mask in pair_iter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train(sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99*exp_cost + 0.01*cost exp_length = 0.99*exp_length + 0.01*mean_length exp_norm = 0.99*exp_norm + 0.01*grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: logging.info('epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, iter_time, mean_length, std_length)) epoch_toc = time.time() ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") ## Validate valid_cost = validate(model, sess, x_dev, y_dev) logging.info("Epoch %d Validation cost: %f time: %f" % (epoch, valid_cost, epoch_toc - epoch_tic)) if len(previous_losses) > 2 and valid_cost > previous_losses[-1]: logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) sess.run(model.learning_rate_decay_op) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) else: previous_losses.append(valid_cost) best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. logging.info("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + os.sep + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=nlc_data.char_tokenizer) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) logging.info("Vocabulary size: %d" % vocab_size) if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler("{0}/log.txt".format(FLAGS.train_dir)) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) tic = time.time() params = tf.trainable_variables() num_params = sum( map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print("Number of params: %d (retrieval took %f secs)" % (num_params, toc - tic)) epoch = 0 best_epoch = 0 train_costs = [] valid_costs = [] previous_valid_losses = [] while FLAGS.epochs == 0 or epoch < FLAGS.epochs: epoch += 1 current_step = 0 epoch_cost = 0 epoch_tic = time.time() for source_tokens, source_mask, target_tokens, target_mask in pair_iter( x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step.fa grad_norm, cost, param_norm = model.train( sess, source_tokens, source_mask, target_tokens, target_mask) lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) cost = cost / mean_length epoch_cost += cost current_step += 1 if current_step % FLAGS.print_every == 0: logging.info( 'epoch %d, iter %d, cost %f, length mean/std %f/%f' % (epoch, current_step, cost, mean_length, std_length)) if (epoch >= FLAGS.anomaly_epochs) and \ (cost >= FLAGS.anomaly_threshold): write_anomaly( source_tokens, vocab_path, SOURCE_PATH + '_' + str(epoch) + '_' + str(current_step)) write_anomaly( target_tokens, vocab_path, TARGET_PATH + '_' + str(epoch) + '_' + str(current_step)) # One epoch average train cost train_costs.append(epoch_cost / current_step) # After one epoch average validate cost epoch_toc = time.time() epoch_time = epoch_toc - epoch_tic valid_cost = validate(model, sess, x_dev, y_dev) valid_costs.append(valid_cost) logging.info("Epoch %d Validation cost: %f time:to %2fs" % (epoch, valid_cost, epoch_time)) # Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") if len(previous_valid_losses ) > 2 and valid_cost > previous_valid_losses[-1]: logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) sess.run(model.learning_rate_decay_op) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) else: previous_valid_losses.append(valid_cost) best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) pickle.dump([train_costs, valid_costs], open('costs_data.pkl', 'wb'))
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) epoch = 0 while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 exp_cost = None exp_length = None exp_norm = None ## Train for source_tokens, source_mask, target_tokens, target_mask in PairIter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost = model.train(sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99*exp_cost + 0.01*cost exp_length = 0.99*exp_length + 0.01*mean_length exp_norm = 0.99*exp_norm + 0.01*grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: print('epoch %d, iter %d, cost %f, exp_cost %f, grad_norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, iter_time, mean_length, std_length)) ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) valid_costs, valid_lengths = [], [] for source_tokens, source_mask, target_tokens, target_mask in PairIter(x_dev, y_dev, FLAGS.batch_size, FLAGS.num_layers): cost, _ = model.test(sess, source_tokens, source_mask, target_tokens, target_mask) valid_costs.append(cost * target_mask.shape[1]) valid_lengths.append(np.sum(target_mask[1:, :])) valid_cost = sum(valid_costs) / float(sum(valid_lengths)) print("Epoch %d Validation cost: %f" % (epoch, valid_cost)) previous_losses.append(valid_cost) if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) print('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print ("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 previous_losses = [] while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 exp_cost = None exp_length = None exp_norm = None ## Train for source_tokens, source_mask, target_tokens, target_mask in PairIter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train(sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99*exp_cost + 0.01*cost exp_length = 0.99*exp_length + 0.01*mean_length exp_norm = 0.99*exp_norm + 0.01*grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: print('epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, iter_time, mean_length, std_length)) ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) ## Validate valid_cost = validate(model, sess, x_dev, y_dev) print("Epoch %d Validation cost: %f" % (epoch, valid_cost)) previous_losses.append(valid_cost) if len(previous_losses) > 2 and valid_cost > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) sys.stdout.flush()