def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) while True: sent = raw_input("Enter a sentence: ") output_sent = fix_sent(model, sess, sent) print("Candidate: ", output_sent)
def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) # import codecs # outfile = open('predictions.txt','w') # outfile2 = open('predictions_all.txt','w') # outfile = open('lambda_train.txt','w') # infile = open(FLAGS.data_dir+'/'+FLAGS.tokenizer.lower()+'/test.y.txt') # lines = infile.readlines() # index = 0 # with codecs.open(FLAGS.data_dir+'/'+FLAGS.tokenizer.lower()+'/test.x.txt', encoding='utf-8') as fr: # with codecs.open('ytc_test.txt', encoding='utf-8') as fr: # for sent in fr: # print("Original: ", sent.strip().encode('utf-8')) # output_sent,all_sents,all_prob,all_lmscore = fix_sent(model, sess, sent.encode('utf-8')) # print("Revised: ", output_sent) # print('*'*30) # outfile.write(output_sent+'\n') # outfile2.write('\t'.join(all_sents)+'\n') # correct_sent = lines[index].strip('\n').strip('\r') # for i in range(len(all_sents)): # if all_sents[i] == correct_sent: # outfile.write('10 qid:'+str(index)+' 1:'+str(all_prob[i])+' 2:'+str(all_lmscore[i])+' #'+all_sents[i]+'\n') # else: # outfile.write('0 qid:'+str(index)+' 1:'+str(all_prob[i])+' 2:'+str(all_lmscore[i])+' #'+all_sents[i]+'\n') # index+=1 while True: sent = raw_input("Enter a sentence: ") output_sent, _, _, _ = fix_sent(model, sess, sent) print("Candidate: ", output_sent)
def load_vocab(): # Prepare NLC data. global reverse_vocab, vocab, vocab_size, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=nlc_data.char_tokenizer) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) # print(vocab) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size)
def setup_batch_decode(sess): # decode for dev-sets, in batches global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS), other_dev_path="/deep/group/nlp_data/nlc_data/ourdev/bpe") vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path, bpe=(FLAGS.tokenizer.lower()=="bpe")) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) return model, x_dev, y_dev
def setup_batch_decode(sess): # decode for dev-sets, in batches global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) return model, x_dev, y_dev
def decode(): # Prepare NLC data. global reverse_vocab, vocab, lm if FLAGS.lmfile is not None: print("Loading Language model from %s" % FLAGS.lmfile) lm = kenlm.LanguageModel(FLAGS.lmfile) print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, reverse_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) if FLAGS.interactive: while True: sent = raw_input("Enter a sentence: ") if sent == 'exit': exit(0) output_sent = fix_sent(model, sess, sent.decode('utf-8')) print("Candidate: ", output_sent) else: test_x_data = os.path.join(FLAGS.data_dir, FLAGS.tokenizer.lower()+'/test.x.txt') if not os.path.exists(test_x_data): print("Please provide {} to test.".format(test_x_data)) exit(-1) with codecs.open(test_x_data, encoding='utf-8') as fr: for sent in fr: print("Original: ", sent.strip().encode('utf-8')) output_sent = fix_sent(model, sess, sent1) print("Revised: ", output_sent.encode('utf-8')) print('*'*30)
def pair_iter(fnamex, fnamey, batch_size, num_layers, FLAGS, sort_and_shuffle=True): global vocab, reverse_vocab vocab, reverse_vocab = nlc_data.initialize_vocabulary( os.path.join(FLAGS['data_dir'], FLAGS['tokenizer'].lower(), "vocab.dat")) fdx, fdy = open(fnamex), open(fnamey) batches = [] while True: if len(batches) == 0: refill(batches, fdx, fdy, batch_size, FLAGS, sort_and_shuffle=sort_and_shuffle) if len(batches) == 0: break x_tokens, y_tokens = batches.pop(0) y_tokens = add_sos_eos(y_tokens) x_padded, y_padded = padded(x_tokens, num_layers), padded(y_tokens, 1) source_tokens = np.array(x_padded).T source_mask = (source_tokens != nlc_data.PAD_ID).astype(np.int32) target_tokens = np.array(y_padded).T target_mask = (target_tokens != nlc_data.PAD_ID).astype(np.int32) yield (source_tokens, source_mask, target_tokens, target_mask) return
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) print('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum( map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 previous_losses = [] exp_cost = None while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 exp_length = None exp_norm = None ## Train for source_tokens, source_mask, target_tokens, target_mask in pair_iter( x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train( sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99 * exp_cost + 0.01 * cost exp_length = 0.99 * exp_length + 0.01 * mean_length exp_norm = 0.99 * exp_norm + 0.01 * grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: print( 'epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, iter_time, mean_length, std_length)) ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) ## Validate valid_cost = validate(model, sess, x_dev, y_dev) print("Epoch %d Validation cost: %f" % (epoch, valid_cost)) previous_losses.append(valid_cost) if len(previous_losses) > 2 and valid_cost > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. logging.info("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) logging.info("Vocabulary size: %d" % vocab_size) if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler("{0}/log.txt".format(FLAGS.train_dir)) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) logging.info('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum( map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 best_epoch = 0 previous_losses = [] exp_cost = None exp_length = None exp_norm = None total_iters = 0 start_time = time.time() while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 ## Train epoch_tic = time.time() for source_tokens, source_mask, target_tokens, target_mask in pair_iter( x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train( sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic total_iters += np.sum(target_mask) tps = total_iters / (time.time() - start_time) current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99 * exp_cost + 0.01 * cost exp_length = 0.99 * exp_length + 0.01 * mean_length exp_norm = 0.99 * exp_norm + 0.01 * grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: logging.info( 'epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, tps %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, tps, mean_length, std_length)) epoch_toc = time.time() ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") ## Validate valid_cost = validate(model, sess, x_dev, y_dev) logging.info("Epoch %d Validation cost: %f time: %f" % (epoch, valid_cost, epoch_toc - epoch_tic)) if len(previous_losses) > 2 and valid_cost > previous_losses[-1]: logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) sess.run(model.learning_rate_decay_op) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) else: previous_losses.append(valid_cost) best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. logging.info("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) logging.info("Vocabulary size: %d" % vocab_size) if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler("{0}/log.txt".format(FLAGS.train_dir)) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) logging.info('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print ("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 best_epoch = 0 previous_losses = [] exp_cost = None exp_length = None exp_norm = None while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 ## Train epoch_tic = time.time() for source_tokens, source_mask, target_tokens, target_mask in pair_iter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train(sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99*exp_cost + 0.01*cost exp_length = 0.99*exp_length + 0.01*mean_length exp_norm = 0.99*exp_norm + 0.01*grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: logging.info('epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, iter_time, mean_length, std_length)) epoch_toc = time.time() ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") ## Validate valid_cost = validate(model, sess, x_dev, y_dev) logging.info("Epoch %d Validation cost: %f time: %f" % (epoch, valid_cost, epoch_toc - epoch_tic)) if len(previous_losses) > 2 and valid_cost > previous_losses[-1]: logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) sess.run(model.learning_rate_decay_op) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) else: previous_losses.append(valid_cost) best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) sys.stdout.flush()
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. logging.info("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + os.sep + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=nlc_data.char_tokenizer) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) logging.info("Vocabulary size: %d" % vocab_size) if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler("{0}/log.txt".format(FLAGS.train_dir)) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) tic = time.time() params = tf.trainable_variables() num_params = sum( map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print("Number of params: %d (retrieval took %f secs)" % (num_params, toc - tic)) epoch = 0 best_epoch = 0 train_costs = [] valid_costs = [] previous_valid_losses = [] while FLAGS.epochs == 0 or epoch < FLAGS.epochs: epoch += 1 current_step = 0 epoch_cost = 0 epoch_tic = time.time() for source_tokens, source_mask, target_tokens, target_mask in pair_iter( x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step.fa grad_norm, cost, param_norm = model.train( sess, source_tokens, source_mask, target_tokens, target_mask) lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) cost = cost / mean_length epoch_cost += cost current_step += 1 if current_step % FLAGS.print_every == 0: logging.info( 'epoch %d, iter %d, cost %f, length mean/std %f/%f' % (epoch, current_step, cost, mean_length, std_length)) if (epoch >= FLAGS.anomaly_epochs) and \ (cost >= FLAGS.anomaly_threshold): write_anomaly( source_tokens, vocab_path, SOURCE_PATH + '_' + str(epoch) + '_' + str(current_step)) write_anomaly( target_tokens, vocab_path, TARGET_PATH + '_' + str(epoch) + '_' + str(current_step)) # One epoch average train cost train_costs.append(epoch_cost / current_step) # After one epoch average validate cost epoch_toc = time.time() epoch_time = epoch_toc - epoch_tic valid_cost = validate(model, sess, x_dev, y_dev) valid_costs.append(valid_cost) logging.info("Epoch %d Validation cost: %f time:to %2fs" % (epoch, valid_cost, epoch_time)) # Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") if len(previous_valid_losses ) > 2 and valid_cost > previous_valid_losses[-1]: logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) sess.run(model.learning_rate_decay_op) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) else: previous_valid_losses.append(valid_cost) best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) pickle.dump([train_costs, valid_costs], open('costs_data.pkl', 'wb'))
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) epoch = 0 while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 exp_cost = None exp_length = None exp_norm = None ## Train for source_tokens, source_mask, target_tokens, target_mask in PairIter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost = model.train(sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99*exp_cost + 0.01*cost exp_length = 0.99*exp_length + 0.01*mean_length exp_norm = 0.99*exp_norm + 0.01*grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: print('epoch %d, iter %d, cost %f, exp_cost %f, grad_norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, iter_time, mean_length, std_length)) ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) valid_costs, valid_lengths = [], [] for source_tokens, source_mask, target_tokens, target_mask in PairIter(x_dev, y_dev, FLAGS.batch_size, FLAGS.num_layers): cost, _ = model.test(sess, source_tokens, source_mask, target_tokens, target_mask) valid_costs.append(cost * target_mask.shape[1]) valid_lengths.append(np.sum(target_mask[1:, :])) valid_cost = sum(valid_costs) / float(sum(valid_lengths)) print("Epoch %d Validation cost: %f" % (epoch, valid_cost)) previous_losses.append(valid_cost) if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) sys.stdout.flush()
## Checkpoint import json import os import logging import pdb import tensorflow as tf import nlc_data import numpy as np from decode import decode_beam, detokenize, create_model, FLAGS # from train import create_model from util import pair_iter vocab, reverse_vocab = nlc_data.initialize_vocabulary("data/char/vocab.dat") best_epoch = 2 vocab_size = 42 checkpoint_path = os.path.join(FLAGS.train_dir, "best.ckpt") config = tf.ConfigProto( device_count={'GPU': 0} ) with tf.Session(config=config) as sess: logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) valid_costs, valid_lengths = [], []
def train(): """Train a translation model using NLC data.""" # Prepare NLC data. print("Preparing NLC data in %s" % FLAGS.data_dir) x_train, y_train, x_dev, y_dev, vocab_path = nlc_data.prepare_nlc_data( FLAGS.data_dir + '/' + FLAGS.tokenizer.lower(), FLAGS.max_vocab_size, tokenizer=get_tokenizer(FLAGS)) vocab, _ = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, vocab_size, False) print('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) if False: tic = time.time() params = tf.trainable_variables() num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) toc = time.time() print ("Number of params: %d (retreival took %f secs)" % (num_params, toc - tic)) epoch = 0 previous_losses = [] while (FLAGS.epochs == 0 or epoch < FLAGS.epochs): epoch += 1 current_step = 0 exp_cost = None exp_length = None exp_norm = None ## Train for source_tokens, source_mask, target_tokens, target_mask in PairIter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): # Get a batch and make a step. tic = time.time() grad_norm, cost, param_norm = model.train(sess, source_tokens, source_mask, target_tokens, target_mask) toc = time.time() iter_time = toc - tic current_step += 1 lengths = np.sum(target_mask, axis=0) mean_length = np.mean(lengths) std_length = np.std(lengths) if not exp_cost: exp_cost = cost exp_length = mean_length exp_norm = grad_norm else: exp_cost = 0.99*exp_cost + 0.01*cost exp_length = 0.99*exp_length + 0.01*mean_length exp_norm = 0.99*exp_norm + 0.01*grad_norm cost = cost / mean_length if current_step % FLAGS.print_every == 0: print('epoch %d, iter %d, cost %f, exp_cost %f, grad norm %f, param norm %f, batch time %f, length mean/std %f/%f' % (epoch, current_step, cost, exp_cost / exp_length, grad_norm, param_norm, iter_time, mean_length, std_length)) ## Checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) ## Validate valid_cost = validate(model, sess, x_dev, y_dev) print("Epoch %d Validation cost: %f" % (epoch, valid_cost)) previous_losses.append(valid_cost) if len(previous_losses) > 2 and valid_cost > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) sys.stdout.flush()
def train(): global vocab, rev_vocab print("Preparing data in %s" % FLAGS.data_dir) path_2_ptb_data = FLAGS.data_dir + "/ptb_data" x_train = "{}/train.ids.x".format(path_2_ptb_data) y_train = "{}/train.ids.y".format(path_2_ptb_data) x_dev = "{}/valid.ids.x".format(path_2_ptb_data) y_dev = "{}/valid.ids.y".format(path_2_ptb_data) vocab_path = "{}/vocab.dat".format(path_2_ptb_data) # source_tokens and target_tokens are transposed source_tokens, source_mask, target_tokens, target_mask = build_data(fnamex="{}/train.ids.x".format(path_2_ptb_data), fnamey="{}/train.ids.y".format(path_2_ptb_data), num_layers=FLAGS.num_layers, max_seq_len=FLAGS.max_seq_len) vocab, rev_vocab = nlc_data.initialize_vocabulary(vocab_path) vocab_size = len(vocab) print("Vocabulary size: %d" % vocab_size) with tf.Session() as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) with tf.variable_scope("actor") as actor_vs: model = create_model(vocab_size, False, actor_vs.name) setup_actor_update(model) with tf.variable_scope("critic") as critic_vs: critic = create_model(vocab_size, False, critic_vs.name) setup_loss_critic(critic) with tf.variable_scope("delayed_actor") as delayed_actor_vs: delayed_actor = create_model(vocab_size, False, delayed_actor_vs.name) setup_actor_update(delayed_actor) with tf.variable_scope("target_critic") as target_critic_vs: target_critic = create_model(vocab_size, False, target_critic_vs.name) setup_loss_critic(target_critic) # if there is not model to restore, we initialize all of them # otherwise, we only need to restore ONCE for everything. # TODO: is this saving for critic even just for sup_only? if not restore_models(sess, model): initialize_models(sess, model) # this should initialize all variables.. # by doing this, we are assigning embeddings as well # thinking about how critic's embeddings can make sense actor_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=actor_vs.name) delayed_actor_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=delayed_actor_vs.name) critic_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=critic_vs.name) target_critic_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=target_critic_vs.name) # initialized but untrained variables are NOT saved # remove this code... # if FLAGS.rl_new: # actor_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=actor_vs.name) # # filter down to Adam # actor_vars = [v for v in actor_vars if "Adam_3" or "_power" in v.name] # # # all_delayed_actor_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=delayed_actor_vs.name) # all_critic_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=critic_vs.name) # all_target_critic_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=target_critic_vs.name) # # sess.run([tf.variables_initializer(all_delayed_actor_vars), # tf.variables_initializer(all_critic_vars), # tf.variables_initializer(all_target_critic_vars), # tf.variables_initializer(actor_vars)]) # sess.run(tf.global_variables_initializer()) if not FLAGS.rl_only: model = train_seq2seq(model, sess, x_dev, y_dev, x_train, y_train) # pre-train actor # assign model's parameter values to delayed_actor set_params_values(actor_variables, delayed_actor_variables, sess, "actor", "delayed_actor") # assign critic's initial parameter values to target_critic set_params_values(critic_variables, target_critic_variables, sess, "critic", "target_critic") if not FLAGS.sup_only: print('Initial validation cost: %f' % validate(model, sess, x_dev, y_dev)) # pre-train critic train_critic(sess, model, critic, delayed_actor, target_critic, x_dev, y_dev, x_train, y_train, actor_variables, delayed_actor_variables, critic_variables, target_critic_variables, train_epochs=FLAGS.critic_epochs, pretrain=True) # train actor-critic (for a given # of epoch?) train_critic(sess, model, critic, delayed_actor, target_critic, x_dev, y_dev, x_train, y_train, actor_variables, delayed_actor_variables, critic_variables, target_critic_variables, train_epochs=FLAGS.rl_epochs, pretrain=False) print('Final validation cost: %f' % validate(model, sess, x_dev, y_dev))