def test(test_args): start = time.time() with open(os.path.join(test_args.save_dir, 'config.pkl')) as f: args = cPickle.load(f) data_loader = TextLoader(args, train=False) test_data = data_loader.read_dataset(test_args.test_file) args.word_vocab_size = data_loader.word_vocab_size print "Word vocab size: " + str(data_loader.word_vocab_size) + "\n" # Model lm_model = WordLM print "Begin testing..." # If using gpu: # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # gpu_config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) # add parameters to the tf session -> tf.Session(config=gpu_config) with tf.Graph().as_default(), tf.Session() as sess: initializer = tf.random_uniform_initializer(-args.init_scale, args.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): mtest = lm_model(args, is_training=False, is_testing=True) # save only the last model saver = tf.train.Saver(tf.all_variables()) tf.initialize_all_variables().run() ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity) print("Test time: %.0f" % (time.time() - start))
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in xrange(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start) if (e * data_loader.num_batches + b) % args.save_every == 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print "model saved to {}".format(checkpoint_path)
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl')) as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []} for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in xrange(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() batch_idx = e * data_loader.num_batches + b print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(batch_idx, args.num_epochs * data_loader.num_batches, e, train_loss, end - start) train_loss_iterations['iteration'].append(batch_idx) train_loss_iterations['epoch'].append(e) train_loss_iterations['train_loss'].append(train_loss) if batch_idx % args.save_every == 0: # evaluate state_val = model.initial_state.eval() avg_val_loss = 0 for x_val, y_val in data_loader.val_batches: feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val} val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val) avg_val_loss += val_loss / len(data_loader.val_batches) print 'val_loss: {:.3f}'.format(avg_val_loss) train_loss_iterations['val_loss'].append(avg_val_loss) checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print "model saved to {}".format(checkpoint_path) else: train_loss_iterations['val_loss'].append(None) pd.DataFrame(data=train_loss_iterations, columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
class TestUtilsMethods(unittest.TestCase): def setUp(self): self.data_loader = TextLoader("tests/test_data", batch_size=2, seq_length=5) def test_init(self): print (self.data_loader.vocab) print (self.data_loader.tensor) print (self.data_loader.vocab_size) def test_build_vocab(self): sentences = ["I", "love", "cat", "cat"] vocab, vocab_inv = self.data_loader.build_vocab(sentences) print (vocab, vocab_inv) # Must include I, love, and cat self.assertItemsEqual(vocab, ["I", "love", "cat"]) self.assertDictEqual(vocab, {'I': 0, 'love': 2, 'cat': 1}) self.assertItemsEqual(vocab_inv, ["I", "love", "cat"]) def test_batch_vocab(self): print (np.array(self.data_loader.x_batches).shape) self.assertItemsEqual(self.data_loader.x_batches[0][0][1:], self.data_loader.y_batches[0][0][:-1]) self.assertItemsEqual(self.data_loader.x_batches[0][1][1:], self.data_loader.y_batches[0][1][:-1])
def train(args): # Load data data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) # Set vocabulary size args.vocab_size = data_loader.vocab_size # Create the save directory if it does not exist if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Save the configuration and the vocab, used to reload models when sampling with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) # Create models with arguments model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0: checkpoint_path = os.path.join(args.save_dir, 'models.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("models saved to {}".format(checkpoint_path)) # Save the final state saver.save(sess, os.path.join(args.save_dir, 'models.ckpt'), global_step=args.num_epochs * data_loader.num_batches)
def train(args): print(args) data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) #print("model learning rate is {}".format(model.lr.eval())) data_loader.reset_batch_pointer('train') state = model.initial_state.eval() for b in xrange(data_loader.ntrain): start = time.time() x, y = data_loader.next_batch('train') # tmp = '' # for c in x: # for i in c: # tmp += np.array(data_loader.chars)[i] # print(tmp) feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.ntrain + b, args.num_epochs * data_loader.ntrain, e, train_loss, end - start)) if (e * data_loader.ntrain + b) % args.save_every == 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.ntrain + b) print("model saved to {}".format(checkpoint_path)) # eval validation loss data_loader.reset_batch_pointer('validation') validation_state = model.initial_state.eval() val_losses = 0 for n in xrange(data_loader.nvalidation): x, y = data_loader.next_batch('validation') feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state} validation_loss, validation_state = sess.run([model.cost, model.final_state], feed) val_losses += validation_loss validation_loss = val_losses / data_loader.nvalidation print("validation loss is {}".format(validation_loss))
def main(_): pp.pprint(FLAGS.__flags) emb = None try: # pre-trained chars embedding emb = np.load("./data/emb.npy") chars = cPickle.load(open("./data/vocab.pkl", 'rb')) vocab_size, emb_size = np.shape(emb) data_loader = TextLoader('./data', FLAGS.batch_size, chars) except Exception: data_loader = TextLoader('./data', FLAGS.batch_size) emb_size = FLAGS.emb_size vocab_size = data_loader.vocab_size model = DialogueModel(batch_size=FLAGS.batch_size, max_seq_length=data_loader.seq_length, vocab_size=vocab_size, pad_token_id=0, unk_token_id=UNK_ID, emb_size=emb_size, memory_size=FLAGS.memory_size, keep_prob=FLAGS.keep_prob, learning_rate=FLAGS.learning_rate, grad_clip=FLAGS.grad_clip, temperature=FLAGS.temperature, infer=False) summaries = tf.summary.merge_all() init = tf.global_variables_initializer() # save hyper-parameters cPickle.dump(FLAGS.__flags, open(FLAGS.logdir + "/hyperparams.pkl", 'wb')) checkpoint = FLAGS.checkpoint + '/model.ckpt' count = 0 saver = tf.train.Saver() with tf.Session() as sess: summary_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph) sess.run(init) if len(glob(checkpoint + "*")) > 0: saver.restore(sess, checkpoint) print("Model restored!") else: # load embedding if emb is not None: sess.run([], {model.embedding: emb}) print("Fresh variables!") current_step = 0 count = 0 for e in range(FLAGS.num_epochs): data_loader.reset_batch_pointer() state = None # iterate by batch for _ in range(data_loader.num_batches): x, y, input_lengths, output_lengths = data_loader.next_batch() if (current_step + 1) % 10 != 0: res = model.step(sess, x, y, input_lengths, output_lengths, state) else: res = model.step(sess, x, y, input_lengths, output_lengths, state, summaries) summary_writer.add_summary(res["summary_out"], current_step) loss = res["loss"] perplexity = np.exp(loss) count += 1 print("{0}/{1}({2}), perplexity {3}".format( current_step + 1, FLAGS.num_epochs * data_loader.num_batches, e, perplexity)) state = res["final_state"] if (current_step + 1) % 2000 == 0: count = 0 summary_writer.flush() save_path = saver.save(sess, checkpoint) print("Model saved in file:", save_path) current_step = tf.train.global_step(sess, model.global_step) summary_writer.close() save_path = saver.save(sess, checkpoint) print("Model saved in file:", save_path)
def main(): args = parse_args() loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = loader.vocab_size print("vocab_size = {}".format(args.vocab_size)) if args.init_from is not None: if os.path.isdir(args.init_from): # init from directory assert os.path.exists(args.init_from), \ "{} is not a directory".format(args.init_from) parent_dir = args.init_from else: # init from file assert os.path.exists("{}.index".format(args.init_from)), \ "{} is not a checkpoint".format(args.init_from) parent_dir = os.path.dirname(args.init_from) config_file = os.path.join(parent_dir, 'config.pkl') vocab_file = os.path.join(parent_dir, 'vocab.pkl') assert os.path.isfile(config_file), \ "config.pkl does not exist in directory {}".format(parent_dir) assert os.path.isfile(vocab_file), \ "vocab.pkl does not exist in directory {}".format(parent_dir) if os.path.isdir(args.init_from): checkpoint = tf.train.latest_checkpoint(parent_dir) assert checkpoint, \ "no checkpoint in directory {}".format(init_from) else: checkpoint = args.init_from with open(os.path.join(parent_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) with open(os.path.join(parent_dir, 'vocab.pkl'), 'rb') as f: saved_vocab = pickle.load(f) assert saved_vocab == loader.vocab, \ "vocab in data directory differs from save" if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) new_config_file = os.path.join(args.save_dir, 'config.pkl') new_vocab_file = os.path.join(args.save_dir, 'vocab.pkl') if not os.path.exists(new_config_file): with open(new_config_file, 'wb') as f: pickle.dump(args, f) if not os.path.exists(new_vocab_file): with open(new_vocab_file, 'wb') as f: pickle.dump(loader.vocab, f) model = Model(args) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) if args.init_from is not None: try: saver.restore(sess, checkpoint) except ValueError: print("{} is not a valid checkpoint".format(checkpoint)) print("initializing from {}".format(checkpoint)) for e in range(args.num_epochs): loader.reset_batch_pointer() for b in range(loader.num_batches): start = time.time() x, _, length = loader.next_batch() # Train critic for i in xrange( 1 ): # How many critic iterations per generator iteration. disc_feed = {model.real_inputs_discrete: x} disc_cost, _ = sess.run( [model.disc_cost, model.disc_train_op], disc_feed) # Train generator gen_cost, _ = sess.run([model.gen_cost, model.gen_train_op]) end = time.time() global_step = e * loader.num_batches + b if global_step % args.display_every == 0 and global_step != 0: print("{}/{} (epoch {}), gen_cost = {:.3f}, disc_cost = {:.3f}, time/batch = {:.3f}" \ .format(b, loader.num_batches, e, gen_cost, disc_cost, end - start)) if global_step % args.save_every == 0 and global_step != 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) print("model saved to {}".format(checkpoint_path))
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []} #Epoch Loop for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) #Mini Batch Training for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() batch_idx = e * data_loader.num_batches + b print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(batch_idx, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) train_loss_iterations['iteration'].append(batch_idx) train_loss_iterations['epoch'].append(e) train_loss_iterations['train_loss'].append(train_loss) #Check point saving if batch_idx % args.save_every == 0: # evaluate the batchs in TF state_val = sess.run(model.initial_state) avg_val_loss = 0 for x_val, y_val in data_loader.val_batches: feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val} val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val) avg_val_loss += val_loss / len(list(data_loader.val_batches)) print('val_loss: {:.3f}'.format(avg_val_loss)) train_loss_iterations['val_loss'].append(avg_val_loss) checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) else: train_loss_iterations['val_loss'].append(None) #Save in Pandas pd.DataFrame(data=train_loss_iterations, columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
def train(args): # Create the data_loader object, which loads up all of our batches, vocab dictionary, etc. # from utils.py (and creates them if they don't already exist). # These files go in the data directory. data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size load_model = False if not os.path.exists(args.save_dir): print("Creating directory %s" % args.save_dir) os.mkdir(args.save_dir) elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))): # Trained model already exists ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) args.block_size = saved_args.block_size args.num_blocks = saved_args.num_blocks args.num_layers = saved_args.num_layers args.model = saved_args.model print( "Found a previous checkpoint. Overwriting model description arguments to:" ) print( " model: {}, block_size: {}, num_blocks: {}, num_layers: {}" .format(saved_args.model, saved_args.block_size, saved_args.num_blocks, saved_args.num_layers)) load_model = True # Save all arguments to config.pkl in the save directory -- NOT the data directory. with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) # Save a tuple of the characters list and the vocab dictionary to chars_vocab.pkl in # the save directory -- NOT the data directory. with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) # Create the model! print("Building the model") model = Model(args) print("Total trainable parameters: {:,d}".format( model.trainable_parameter_count())) # Make tensorflow less verbose; filter out info (1+) and warnings (2+) but not errors (3). os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = tf.ConfigProto(log_device_placement=False) #config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(model.save_variables_list(), max_to_keep=3) if (load_model): print("Loading saved parameters") saver.restore(sess, ckpt.model_checkpoint_path) global_epoch_fraction = sess.run(model.global_epoch_fraction) global_seconds_elapsed = sess.run(model.global_seconds_elapsed) if load_model: print( "Resuming from global epoch fraction {:.3f}," " total trained time: {}, learning rate: {}".format( global_epoch_fraction, datetime.timedelta(seconds=float(global_seconds_elapsed)), sess.run(model.lr))) if (args.set_learning_rate > 0): sess.run(tf.assign(model.lr, args.set_learning_rate)) print("Reset learning rate to {}".format(args.set_learning_rate)) data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction) initial_batch_step = int( (global_epoch_fraction - int(global_epoch_fraction)) * data_loader.total_batch_count) epoch_range = (int(global_epoch_fraction), args.num_epochs + int(global_epoch_fraction)) writer = tf.summary.FileWriter(args.save_dir, graph=tf.get_default_graph()) outputs = [ model.cost, model.final_state, model.train_op, model.summary_op ] global_step = epoch_range[ 0] * data_loader.total_batch_count + initial_batch_step avg_loss = 0 avg_steps = 0 try: for e in range(*epoch_range): # e iterates through the training epochs. # Reset the model state, so it does not carry over from the end of the previous epoch. state = sess.run(model.zero_state) batch_range = (initial_batch_step, data_loader.total_batch_count) initial_batch_step = 0 for b in range(*batch_range): global_step += 1 if global_step % args.decay_steps == 0: # Set the model.lr element of the model to track # the appropriately decayed learning rate. current_learning_rate = sess.run(model.lr) current_learning_rate *= args.decay_rate sess.run(tf.assign(model.lr, current_learning_rate)) print("Decayed learning rate to {}".format( current_learning_rate)) start = time.time() # Pull the next batch inputs (x) and targets (y) from the data loader. x, y = data_loader.next_batch() # feed is a dictionary of variable references and respective values for initialization. # Initialize the model's input data and target data from the batch, # and initialize the model state to the final state from the previous batch, so that # model state is accumulated and carried over between batches. feed = {model.input_data: x, model.targets: y} model.add_state_to_feed_dict(feed, state) # Run the session! Specifically, tell TensorFlow to compute the graph to calculate # the values of cost, final state, and the training op. # Cost is used to monitor progress. # Final state is used to carry over the state into the next batch. # Training op is not used, but we want it to be calculated, since that calculation # is what updates parameter states (i.e. that is where the training happens). train_loss, state, _, summary = sess.run(outputs, feed) elapsed = time.time() - start global_seconds_elapsed += elapsed writer.add_summary(summary, e * batch_range[1] + b + 1) if avg_steps < 100: avg_steps += 1 avg_loss = 1 / avg_steps * train_loss + ( 1 - 1 / avg_steps) * avg_loss print("{:,d} / {:,d} (epoch {:.3f} / {}), loss {:.3f} (avg {:.3f}), {:.3f}s" \ .format(b, batch_range[1], e + b / batch_range[1], epoch_range[1], train_loss, avg_loss, elapsed)) # Every save_every batches, save the model to disk. # By default, only the five most recent checkpoint files are kept. if (e * batch_range[1] + b + 1) % args.save_every == 0 \ or (e == epoch_range[1] - 1 and b == batch_range[1] - 1): save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed) except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: writer.flush() global_step = e * data_loader.total_batch_count + b save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed)
def train2(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.reprocess) args.vocab_size = data_loader.vocab_size totalTask = args.num_epochs * data_loader.num_batches lastCheckpoint = tf.train.latest_checkpoint(args.save_dir) if lastCheckpoint is None: startEpoch = 0 else: print "Last checkpoint :", lastCheckpoint startEpoch = int(lastCheckpoint.split("-")[-1]) print "startEpoch = ", startEpoch with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = ConstrainedModel(args) etaCount = 0 etaString = "-" etaStart = time.time() etaTime = 0 with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) if startEpoch > 0: # load latest checkpoint print "Loading last checkpoint" saver.restore(sess, lastCheckpoint) for e in xrange(startEpoch, args.num_epochs): sess.run(tf.assign(model.lr, decayForEpoch(args, e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in xrange(data_loader.num_batches): start = time.time() x, y, con = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state, model.con_data:con} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) #time.sleep(0.01) #train_loss = 5 end = time.time() taskNum = (e * data_loader.num_batches + b) etaCount += 1 if (etaCount) % 25 == 0: duration = time.time() - etaStart etaTime = (totalTask - (taskNum + 1)) / 25 * duration m, s = divmod(etaTime, 60) h, m = divmod(m, 60) etaString = "%d:%02d:%02d" % (h, m, s) etaStart = time.time() print "{}/{} (epoch {}), loss = {:.3f}, time/batch = {:.3f}, ETA: {} ({})" \ .format(taskNum, totalTask, e, train_loss, end - start, time.ctime(time.time()+etaTime), etaString) if (e + 1) % args.save_every == 0 or e == args.num_epochs - 1: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e + 1) print "model saved to {}".format(checkpoint_path)
def train(args): data_loader = TextLoader(args.data_path, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size args.file_size = data_loader.file_size print("Vocab size: ",args.vocab_size) print("File size: ",args.file_size) args.lower_bound = 0 #If we know the entropy then we set it to this data_info = {} if args.info_path is not None: assert os.path.isfile(args.info_path),"Info file not found in the path: %s"%args.info_path #Open the info file with open(args.info_path, 'rb') as f: data_info = json.load(f) #Assuming we know entropy args.lower_bound = data_info['Entropy'] print(data_info) # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) ################################################## # Get the model ################################################## model = Model(args) print("model Loaded") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) writer = tf.summary.FileWriter(args.summary_dir,sess.graph) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) ###################################################### # Perform the training ##################################################### for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() #Need to check what this does state = sess.run(model.initial_state) #What is this initial state cumul_loss = 0 for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h summary, train_loss, state, _ = sess.run([model.merged_summaries, model.cost, model.final_state, model.train_op], feed) #what is the training loss train_loss /= np.log(2) cumul_loss += train_loss end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) if b%10 == 0: writer.add_summary(summary,e*data_loader.num_batches + b) cumul_loss /= data_loader.num_batches print("Epoch {}: Cumulative Loss for the epoch: {:.3f}".format(e,cumul_loss)) if (abs(cumul_loss - args.lower_bound) < 0.1): print("Stopping Training as we get a good loss.. :) ... ") break ############################################################## # Append details to the output file ############################################################## args.epoch_stopped=e+1 args.last_epoch_loss = cumul_loss with open(args.output_path, 'a') as f: params = vars(args) params.update(data_info) #json.dump(params, f,indent=2) cPickle.dump(params,f) #f.write("\n ############################################# \n") with open(args.output_path+".json", 'a') as f: params = vars(args) params.update(data_info) json.dump(params, f,indent=2) #cPickle.dump(params) f.write("\n ############################################# \n")
def cross_validation(args): data_loader = TextLoader(args.utils_dir, args.data_path, args.batch_size, args.seq_length, None, None) args.vocab_size = data_loader.vocab_size args.label_size = data_loader.label_size with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) with open(os.path.join(args.save_dir, 'labels.pkl'), 'wb') as f: pickle.dump(data_loader.labels, f) data = data_loader.tensor.copy() np.random.shuffle(data) data_list = np.array_split(data, 10, axis=0) model = Model(args) accuracy_list = [] with tf.Session() as sess: for n in range(10): init = tf.initialize_all_variables() sess.run(init) saver = tf.train.Saver(tf.all_variables()) test_data = data_list[n].copy() train_data = np.concatenate(map(lambda i: data_list[i], [j for j in range(10) if j!=n]), axis=0) data_loader.tensor = train_data for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() for b in range(data_loader.num_batches): start = time.time() state = model.initial_state.eval() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _, accuracy = sess.run([model.cost, model.final_state, model.optimizer, model.accuracy], feed_dict=feed) end = time.time() print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\ .format(e * data_loader.num_batches + b + 1, args.num_epochs * data_loader.num_batches, e + 1, train_loss, accuracy, end - start) if (e*data_loader.num_batches+b+1) % args.save_every == 0 \ or (e==args.num_epochs-1 and b==data_loader.num_batches-1): checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e*data_loader.num_batches+b+1) print 'model saved to {}'.format(checkpoint_path) n_chunks = len(test_data) / args.batch_size if len(test_data) % args.batch_size: n_chunks += 1 test_data_list = np.array_split(test_data, n_chunks, axis=0) correct_total = 0.0 num_total = 0.0 for m in range(n_chunks): start = time.time() x = test_data_list[m][:, :-1] y = test_data_list[m][:, -1] results = model.predict_class(sess, x) correct_num = np.sum(results==y) end = time.time() correct_total += correct_num num_total += len(x) accuracy_total = correct_total / num_total accuracy_list.append(accuracy_total) print 'total_num = {}, total_accuracy = {:.6f}'.format(int(num_total), accuracy_total) accuracy_average = np.average(accuracy_list) print 'The average accuracy of cross_validation is {}'.format(accuracy_average)
def train(args): # Create the data_loader object, which loads up all of our batches, vocab dictionary, etc. # from utils.py (and creates them if they don't already exist). # These files go in the data directory. data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size load_model = False if not os.path.exists(args.save_dir): print("Creating directory %s" % args.save_dir) os.mkdir(args.save_dir) elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))): # Trained model already exists ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: with open(os.path.join(args.save_dir, 'config.pkl')) as f: saved_args = cPickle.load(f) args.rnn_size = saved_args.rnn_size args.num_layers = saved_args.num_layers args.model = saved_args.model print("Found a previous checkpoint. Overwriting model description arguments to:") print(" model: {}, rnn_size: {}, num_layers: {}".format( saved_args.model, saved_args.rnn_size, saved_args.num_layers)) load_model = True # Save all arguments to config.pkl in the save directory -- NOT the data directory. with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f: cPickle.dump(args, f) # Save a tuple of the characters list and the vocab dictionary to chars_vocab.pkl in # the save directory -- NOT the data directory. with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) # Create the model! print("Building the model") model = Model(args) config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(model.save_variables_list()) if (load_model): print("Loading saved parameters") saver.restore(sess, ckpt.model_checkpoint_path) global_epoch_fraction = sess.run(model.global_epoch_fraction) global_seconds_elapsed = sess.run(model.global_seconds_elapsed) if load_model: print("Resuming from global epoch fraction {:.3f}," " total trained time: {}, learning rate: {}".format( global_epoch_fraction, global_seconds_elapsed, sess.run(model.lr))) data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction) initial_batch_step = int((global_epoch_fraction - int(global_epoch_fraction)) * data_loader.total_batch_count) epoch_range = (int(global_epoch_fraction), args.num_epochs + int(global_epoch_fraction)) writer = tf.train.SummaryWriter(args.save_dir, graph=tf.get_default_graph()) outputs = [model.cost, model.final_state, model.train_op, model.summary_op] is_lstm = args.model == 'lstm' global_step = epoch_range[0] * data_loader.total_batch_count + initial_batch_step try: for e in xrange(*epoch_range): # e iterates through the training epochs. # Reset the model state, so it does not carry over from the end of the previous epoch. state = sess.run(model.initial_state) batch_range = (initial_batch_step, data_loader.total_batch_count) initial_batch_step = 0 for b in xrange(*batch_range): global_step += 1 if global_step % args.decay_steps == 0: # Set the model.lr element of the model to track # the appropriately decayed learning rate. current_learning_rate = sess.run(model.lr) current_learning_rate *= args.decay_rate sess.run(tf.assign(model.lr, current_learning_rate)) print("Decayed learning rate to {}".format(current_learning_rate)) start = time.time() # Pull the next batch inputs (x) and targets (y) from the data loader. x, y = data_loader.next_batch() # feed is a dictionary of variable references and respective values for initialization. # Initialize the model's input data and target data from the batch, # and initialize the model state to the final state from the previous batch, so that # model state is accumulated and carried over between batches. feed = {model.input_data: x, model.targets: y} if is_lstm: for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h else: for i, c in enumerate(model.initial_state): feed[c] = state[i] # Run the session! Specifically, tell TensorFlow to compute the graph to calculate # the values of cost, final state, and the training op. # Cost is used to monitor progress. # Final state is used to carry over the state into the next batch. # Training op is not used, but we want it to be calculated, since that calculation # is what updates parameter states (i.e. that is where the training happens). train_loss, state, _, summary = sess.run(outputs, feed) elapsed = time.time() - start global_seconds_elapsed += elapsed writer.add_summary(summary, e * batch_range[1] + b + 1) print "{}/{} (epoch {}/{}), loss = {:.3f}, time/batch = {:.3f}s" \ .format(b, batch_range[1], e, epoch_range[1], train_loss, elapsed) # Every save_every batches, save the model to disk. # By default, only the five most recent checkpoint files are kept. if (e * batch_range[1] + b + 1) % args.save_every == 0 \ or (e == epoch_range[1] - 1 and b == batch_range[1] - 1): save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed) except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: writer.flush() global_step = e * data_loader.total_batch_count + b save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed)
def train(args): if args.continue_training in ['True', 'true']: args.continue_training = True else: args.continue_training = False data_loader = TextLoader(True, args.utils_dir, args.data_path, args.batch_size, args.seq_length, None, None) args.vocab_size = data_loader.vocab_size args.label_size = data_loader.label_size if args.continue_training: assert os.path.isfile(os.path.join(args.save_dir, 'config.pkl')), 'config.pkl file does not exist in path %s' % args.save_dir assert os.path.isfile(os.path.join(args.utils_dir, 'chars_vocab.pkl')), 'chars_vocab.pkl file does not exist in path %s' % args.utils_dir assert os.path.isfile(os.path.join(args.utils_dir, 'labels.pkl')), 'labels.pkl file does not exist in path %s' % args.utils_dir ckpt = tf.train.get_checkpoint_state(args.save_dir) assert ckpt, 'No checkpoint found' assert ckpt.model_checkpoint_path, 'No model path found in checkpoint' with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_model_args = pickle.load(f) need_be_same = ['model', 'rnn_size', 'num_layers', 'seq_length'] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme], 'command line argument and saved model disagree on %s' % checkme with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = pickle.load(f) with open(os.path.join(args.utils_dir, 'labels.pkl'), 'rb') as f: saved_labels = pickle.load(f) assert saved_chars==data_loader.chars, 'data and loaded model disagree on character set' assert saved_vocab==data_loader.vocab, 'data and loaded model disagree on dictionary mappings' assert saved_labels==data_loader.labels, 'data and loaded model disagree on label dictionary mappings' with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) with open(os.path.join(args.utils_dir, 'labels.pkl'), 'wb') as f: pickle.dump(data_loader.labels, f) model = Model(args) with tf.Session() as sess: init = tf.initialize_all_variables() sess.run(init) saver = tf.train.Saver(tf.all_variables()) if args.continue_training: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} train_loss, state, _, accuracy = sess.run([model.cost, model.final_state, model.optimizer, model.accuracy], feed_dict=feed) end = time.time() print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\ .format(e * data_loader.num_batches + b + 1, args.num_epochs * data_loader.num_batches, e + 1, train_loss, accuracy, end - start) if (e*data_loader.num_batches+b+1) % args.save_every == 0 \ or (e==args.num_epochs-1 and b==data_loader.num_batches-1): checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e*data_loader.num_batches+b+1) print 'model saved to {}'.format(checkpoint_path)
def train(args): onlyfiles = [ f for f in listdir(args.data_dir) if isfile(join(args.data_dir, f)) and ( not ("pkl" in f) and not ("npy" in f)) ] for f in onlyfiles: print(f) data_loader = TextLoader(args.data_dir, onlyfiles, args.batch_size, args.seq_length, args.cid_num) args.event_vocab_size = data_loader.event_vocab_size args.para_vocab_size = data_loader.para_vocab_size #event_vocab_rev #print(data_loader.event_vocab) #print(data_loader.event_words) #for data_loader in data_loader_list: # args.vocab_size = data_loader.vocab_size #data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "event_words_vocab.pkl") ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "para_words_vocab.pkl") ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme #self.para_vocab, self.para_words, self.para_vocab_rev # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'event_words_vocab.pkl'), 'rb') as f: event_saved_vocab, event_saved_words, event_saved_vocab_rev, = cPickle.load( f) with open(os.path.join(args.init_from, 'para_words_vocab.pkl'), 'rb') as f: para_saved_vocab, para_saved_words, para_saved_vocab_rev, = cPickle.load( f) assert event_saved_words == data_loader.event_words, "Data and loaded model disagree on word set!" assert event_saved_vocab == data_loader.event_vocab, "Data and loaded model disagree on dictionary mappings!" assert event_saved_vocab_rev == data_loader.event_vocab_rev, "Data and loaded model disagree on dictionary mappings!" assert para_saved_words == data_loader.para_words, "Data and loaded model disagree on word set!" assert para_saved_vocab == data_loader.para_vocab, "Data and loaded model disagree on dictionary mappings!" assert para_saved_vocab_rev == data_loader.para_vocab_rev, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'event_words_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.event_vocab, data_loader.event_words, data_loader.event_vocab_rev), f) with open(os.path.join(args.save_dir, 'para_words_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.para_vocab, data_loader.para_words, data_loader.para_vocab_rev), f) model = Model(args) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # instrument for tensorboard summaries = tf.summary.merge_all() writer = tf.summary.FileWriter( os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S"))) writer.add_graph(sess.graph) #sess.run(tf.global_variables_initializer()) tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run( tf.assign( model.lr, # the variable, a mutable tensor args.learning_rate * (args.decay_rate** e))) # variable to be assigned to the model.lr data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): #for y_e, y_a1, y_a2, b_e, b_a1, b_a2 in generate_batch(): start = time.time() x_e, y_e, x_p1, y_p1, x_p2, y_p2 = data_loader.next_batch() feed = { model.event_input_data: x_e, model.para1_input_data: x_p1, model.para2_input_data: x_p2, model.targets: y_e, model.targets_para1: y_p1, model.targets_para2: y_p2 } for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h #train_loss, state, _, _, _, _, = sess.run([model.cost, model.final_state, model.train_op, model.probs, model.probs2, model.probs3], feed) train_loss, state, _, res_prob, res_prob1, res_prob2 = sess.run( [ model.cost, model.final_state, model.train_op, model.probs, model.probs1, model.probs2 ], feed) print(res_prob, res_prob1, res_prob1) print(" ") #arrays = tf.constant(res_prob) #print (arrays[0, :]) #arrays1 = tf.constant(res_prob1) #print (arrays1[0, :]) #calcuateProbDis(res_prob, res_prob1) #for i in res_prob : #print(res_prob.size, res_prob1.size) # instrument for tensorboard summ, train_loss, state, _ = sess.run( [summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summ, e * data_loader.num_batches + b) end = time.time() print( "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e == args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def setUp(self): self.data_loader = TextLoader("tests/test_data", batch_size=2, seq_length=5)
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "words_vocab.pkl") ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f: saved_words, saved_vocab = cPickle.load(f) assert saved_words == data_loader.words, "Data and loaded model disagree on word set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocab), f) model = Model(args) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(args.log_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_writer.add_graph(sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(model.epoch_pointer.eval(), args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) speed = 0 if args.init_from is None: assign_op = model.epoch_pointer.assign(e) sess.run(assign_op) if args.init_from is not None: data_loader.pointer = model.batch_pointer.eval() args.init_from = None for b in range(data_loader.pointer, data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = { model.input_data: x, model.targets: y, model.initial_state: state, model.batch_time: speed } summary, train_loss, state, _, _ = sess.run([ merged, model.cost, model.final_state, model.train_op, model.inc_batch_pointer_op ], feed) train_writer.add_summary(summary, e * data_loader.num_batches + b) speed = time.time() - start if (e * data_loader.num_batches + b) % args.batch_size == 0: print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, speed)) if (e * data_loader.num_batches + b) % args.save_every == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) train_writer.close()
def train(args): data_loader = TextLoader(args.batch_size) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a a path" % args.init_from assert os.path.isfile( os.path.join( args.init_from, r".." + os.path.sep + "rnn_2" + os.path.sep + "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join( args.init_from, r".." + os.path.sep + "rnn_2" + os.path.sep + "chars_vocab.pkl") ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" assert os.path.isfile( os.path.join( args.init_from, r".." + os.path.sep + "rnn_2" + os.path.sep + "iterations") ), "iterations file does not exist in path %s " % args.init_from # open old config and check if models are compatible with open( os.path.join( args.init_from, r".." + os.path.sep + "rnn_2" + os.path.sep + "config.pkl"), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open( os.path.join( args.init_from, r".." + os.path.sep + "rnn_2" + os.path.sep + "chars_vocab.pkl"), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open( os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep + "config.pkl"), 'wb') as f: cPickle.dump(args, f, protocol=2) with open( os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep + "chars_vocab.pkl"), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f, protocol=2) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) iterations = 0 # restore model and number of iterations if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) with open( os.path.join( args.save_dir, r".." + os.path.sep + "rnn_2" + os.path.sep + "iterations"), 'rb') as f: iterations = cPickle.load(f) losses = [] for e in range(args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() for b in range(data_loader.num_batches): iterations += 1 start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} train_loss, _, _ = sess.run( [model.cost, model.final_state, model.train_op], feed) end = time.time() sys.stdout.write('\r') info = "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start) sys.stdout.write(info) sys.stdout.flush() losses.append(train_loss) if (e * data_loader.num_batches + b) % args.save_every == 0 \ or (e == args.num_epochs - 1 and b == data_loader.num_batches - 1): # save for the last result checkpoint_path = os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep + "model.ckpt") saver.save(sess, checkpoint_path, global_step=iterations) with open( os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep + "iterations"), 'wb') as f: cPickle.dump(iterations, f, protocol=2) with open( os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep + "losses-" + str(iterations)), 'wb') as f: cPickle.dump(losses, f, protocol=2) losses = [] sys.stdout.write('\n') print("model saved to {}".format(checkpoint_path)) sys.stdout.write('\n')
def train(args): provide_key_words = args.use_attention or args.state_initialization == 'average' if args.use_attention and args.state_initialization == 'prev' and args.attention_type == 'bahdanau_coverage': args.state_initialization = 'random' data_loader = TextLoader(args.load_preprocessed, 'training', args.data_dir, args.batch_size, args.seq_length, args.vocab_size, args.unk_max_number, args.unk_max_count, None, args.use_bpe, args.bpe_size, args.bpe_model_path, args.pretrained_embeddings, provide_key_words, args.key_word_count_multiplier, args.pos_tags, args.input_encoding) args.vocab_size = data_loader.vocab_size args.words_vocab_file = data_loader.words_vocab_file args.bpe_model_path = data_loader.bpe_model_path if args.pretrained_embeddings is not None: args.processed_embeddings = os.path.join(data_loader.embedding_dir, 'embedding_matrix.pkl') if args.validation_data_dir is not None: val_data_loader = TextLoader( args.load_preprocessed, 'validation', args.validation_data_dir, args.batch_size, args.seq_length, args.vocab_size, args.unk_max_number, args.unk_max_count, data_loader.vocab, args.use_bpe, args.bpe_size, data_loader.bpe_model_path, args.pretrained_embeddings, provide_key_words, args.key_word_count_multiplier, args.pos_tags, args.input_encoding) validation_log = open(os.path.join(args.log_dir, 'validation_log.txt'), 'a') # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = [ "rnn_size", "embedding_size", "num_layers", "dropout_prob", "batch_size", "seq_length", "attention_type", "use_attention", "dont_train_embeddings" ] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme assert os.path.isfile( saved_model_args.words_vocab_file ), "words_vocab.pkl.pkl file does not exist in path %s" % saved_model_args.words_vocab_file # open saved vocab/dict and check if vocabs/dicts are compatible with open(saved_model_args.words_vocab_file, 'rb') as f: saved_words, saved_vocab = cPickle.load(f) assert saved_words == data_loader.words, "Data and loaded model disagree on word set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(data_loader.words_vocab_file, 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocab), f) model = Model(args) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(args.log_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem) checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') training_log = open(os.path.join(args.log_dir, 'training_log.txt'), 'a') best_val_error = None start_epoch = 0 learning_rate = args.learning_rate with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_writer.add_graph(sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) zero_state = sess.run(model.initial_state) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) learning_rate = model.lr.eval() best_val_error = model.best_val_error.eval() or None best_val_epoch = model.best_val_epoch.eval() start_epoch = model.epoch_pointer.eval() + 1 else: sess.run(tf.assign(model.lr, learning_rate)) for e in range(start_epoch, args.num_epochs): epoch_start = time.time() # decrease learning rate after every epoch if adaptive_learning_rate is not used if (args.validation_data_dir is None or args.adaptive_learning_rate <= 0) and e > 0: learning_rate *= args.decay_rate sess.run(tf.assign(model.lr, learning_rate)) data_loader.reset_batch_pointer() state = zero_state epoch_error = 0 epoch_coverage_loss = 0 # as every epoch is started, save its number in the model sess.run(tf.assign(model.epoch_pointer, e)) for b in range(data_loader.pointer, data_loader.num_batches): x, y, target_weights, target_sequence_length, key_words, key_words_count, key_words_weights = data_loader.next_batch( ) if args.state_initialization == 'zero': state = zero_state if key_words is not None: feed = { model.input_data: x, model.targets: y, model.target_weights: target_weights, model.target_sequence_length: target_sequence_length, model.initial_state: state, model.attention_key_words: key_words, model.attention_states_count: key_words_count, model.attention_states_weights: key_words_weights } else: feed = { model.input_data: x, model.targets: y, model.target_weights: target_weights, model.target_sequence_length: target_sequence_length, model.initial_state: state } summary, train_loss, state, _ = sess.run( [merged, model.cost, model.final_state, model.train_op], feed) # if model trained has bahdanau_coverage attention type, collect coverage_loss as well if args.use_attention and args.attention_type == 'bahdanau_coverage': epoch_coverage_loss += np.sum(state.coverage_loss) # accumulate the train_loss epoch_error += train_loss if (e * data_loader.num_batches + b) % args.batch_size == 0: train_writer.add_summary(summary, e * data_loader.num_batches + b) epoch_speed = time.time() - epoch_start if args.use_attention and args.attention_type == 'bahdanau_coverage': print( "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_coverage_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n" .format( e, epoch_error / data_loader.num_batches, epoch_coverage_loss / args.batch_size / data_loader.num_batches, epoch_speed, learning_rate)) training_log.write( "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_coverage_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n" .format( e, epoch_error / data_loader.num_batches, epoch_coverage_loss / args.batch_size / data_loader.num_batches, epoch_speed, learning_rate)) else: print( "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n" .format(e, epoch_error / data_loader.num_batches, epoch_speed, learning_rate)) training_log.write( "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n" .format(e, epoch_error / data_loader.num_batches, epoch_speed, learning_rate)) if e % args.save_every == 0 or e == args.num_epochs - 1: # save for the last result # validate every saved model if args.validation_data_dir is not None: val_start = time.time() val_data_loader.reset_batch_pointer() val_error = 0 val_coverage_loss = 0 val_state = zero_state for val_b in range(val_data_loader.pointer, val_data_loader.num_batches): if args.state_initialization == 'zero': val_state = zero_state val_x, val_y, val_target_weights, val_target_sequence_length, val_key_words, val_key_words_count, val_key_words_weights = val_data_loader.next_batch( ) if val_key_words is not None: val_feed = { model.input_data: val_x, model.targets: val_y, model.target_weights: val_target_weights, model.target_sequence_length: val_target_sequence_length, model.initial_state: val_state, model.attention_key_words: val_key_words, model.attention_states_count: val_key_words_count, model.attention_states_weights: val_key_words_weights } else: val_feed = { model.input_data: val_x, model.targets: val_y, model.target_weights: val_target_weights, model.target_sequence_length: val_target_sequence_length, model.initial_state: val_state } val_train_loss, val_state = sess.run( [model.cost, model.final_state], val_feed) val_error += val_train_loss # if model trained has bahdanau_coverage attention type, collect coverage_loss as well if args.use_attention and args.attention_type == 'bahdanau_coverage': val_coverage_loss += np.sum( val_state.coverage_loss) mean_val_error = val_error / val_data_loader.num_batches val_speed = time.time() - val_start if args.use_attention and args.attention_type == 'bahdanau_coverage': print( "epoch\t{}\tvalidation_loss\t{:.3f}\tval_coverage_loss\t{:.3f}\tvalidation_time\t{:.3f}\n" .format( e, mean_val_error, val_coverage_loss / args.batch_size / val_data_loader.num_batches, val_speed)) validation_log.write( "epoch\t{}\tvalidation_loss\t{:.3f}\tval_coverage_loss\t{:.3f}\tvalidation_time\t{:.3f}\n" .format( e, mean_val_error, val_coverage_loss / args.batch_size / val_data_loader.num_batches, val_speed)) else: print( "epoch\t{}\tvalidation_loss\t{:.3f}\tvalidation_time\t{:.3f}\n" .format(e, mean_val_error, val_speed)) validation_log.write( "epoch\t{}\tvalidation_loss\t{:.3f}\tvalidation_time\t{:.3f}\n" .format(e, mean_val_error, val_speed)) # save information about best validation error and epoch in model if best_val_error is None or best_val_error > mean_val_error: print('======= NEW BEST EPOCH =======') best_val_error = mean_val_error best_val_epoch = e sess.run( tf.assign(model.best_val_error, best_val_error)) sess.run( tf.assign(model.best_val_epoch, best_val_epoch)) # if adaptive learning rate is used and enough epochs have passed without improvement then decrease learning rate elif e - best_val_epoch >= args.adaptive_learning_rate and args.adaptive_learning_rate > 0: learning_rate *= args.decay_rate sess.run(tf.assign(model.lr, learning_rate)) if args.save_all or best_val_epoch == e: saver.save(sess, checkpoint_path, global_step=e) print("model saved to {}".format(checkpoint_path)) if e - best_val_epoch >= args.max_worse_validations and args.max_worse_validations > 0: print( "finishing early as {} evaluated models did not lower the validation loss" .format(args.max_worse_validations)) break else: saver.save(sess, checkpoint_path, global_step=e) print("model saved to {}".format(checkpoint_path)) training_log.close() if args.validation_data_dir is not None: validation_log.close() val_data_loader.close() train_writer.close() data_loader.close()
def train(args): # Data Preparation # ==================================== data_loader = TextLoader(args.data_dir, args.sense_file, args.batch_size, args.seq_length, args.data_set_size) args.vocab_size = data_loader.vocab_size args.verb_size = len(data_loader.verbs) print(args.verb_size) print("Number of sentences: {}".format(data_loader.num_data)) print("Vocabulary size: {}".format(args.vocab_size)) # Check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "words_vocab.pkl") ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = pickle.load(f) need_be_same = ["rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f: saved_words, saved_vocab = pickle.load(f) assert saved_words == data_loader.words, "Data and loaded model disagree on word set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.words, data_loader.vocab), f) """ embedding_matrix = get_vocab_embedding(args.save_dir, data_loader.words, args.embedding_file) print("Embedding matrix shape:",embedding_matrix.shape) """ # Training # ==================================== with tf.Graph().as_default(): with tf.Session() as sess: #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.run() model = BasicLSTM(args) # Define training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(args.learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars), args.grad_clip) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) # Keep track of gradient values and sparsity grad_summaries = [] for g, v in zip(grads, tvars): if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) # Summary for loss loss_summary = tf.summary.scalar("loss", model.cost) # Train summaries merged = tf.summary.merge_all() if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) train_writer = tf.summary.FileWriter(args.log_dir, sess.graph) saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) # Restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) # Start training print("Start training") # create test and dev sets data_loader.reset_batch_pointer() x_batch_dev, y_batch_dev, unk_count_dev, n_sent_dev, _ = data_loader.next_batch_test( ) feed_dict_dev = { model.x: x_batch_dev, model.y: y_batch_dev, model.keep_prob: args.keep_prob } data_loader.reset_batch_pointer() x_batch_test, y_batch_test, unk_count_test, n_sent_test, _ = data_loader.next_batch_test( set_to_choose=1) feed_dict_test = { model.x: x_batch_test, model.y: y_batch_test, model.keep_prob: args.keep_prob } data_loader.reset_batch_pointer() steps = 0 prev_equal = 0. for epoch in range(args.num_epochs): data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for i in range(data_loader.num_batches): start = time.time() x_batch, y_batch, unk_count, n_sent, _ = data_loader.next_batch( ) feed_dict = { model.x: x_batch, model.y: y_batch, model.keep_prob: args.keep_prob } _, step, summary, loss, equal = sess.run([ train_op, global_step, merged, model.cost, model.equal ], feed_dict) print( "training step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}" .format(step, epoch, i, data_loader.num_batches, loss, np.mean(equal), int(unk_count / args.batch_size), n_sent, time.time() - start)) # Start dev print("Start dev") data_loader.reset_batch_pointer() accur = [] for i in range(data_loader.num_batches_test): start = time.time() step, summary, loss, equal = sess.run( [global_step, merged, model.cost, model.equal], feed_dict_dev) print( "dev step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}" .format(step, epoch, i, data_loader.num_batches_test, loss, np.mean(equal), int(unk_count_test / args.batch_size), n_sent_test, time.time() - start)) accur.append(np.mean(equal)) eval_acc = np.mean(accur) if eval_acc > prev_equal: prev_equal = eval_acc train_writer.add_summary(summary, step) current_step = tf.train.global_step(sess, global_step) checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') path = saver.save(sess, checkpoint_path, global_step=current_step) print("Saved model checkpoint to {}".format(path)) elif steps > args.stop_count: print("early stopping") break else: steps += 1 print("Start test") data_loader.reset_batch_pointer() accur = [] for i in range(data_loader.num_batches_test): start = time.time() step, summary, loss, equal = sess.run( [global_step, merged, model.cost, model.equal], feed_dict_test) print( "test step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}" .format(step, epoch, i, data_loader.num_batches_test, loss, np.mean(equal), int(unk_count_test / args.batch_size), n_sent_test, time.time() - start)) accur.append(np.mean(equal)) print('avg test: {:.4f}'.format(np.mean(accur))) train_writer.close()
def infer(args): start = time.time() # Load testing data # ==================================== with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) print('restored args:\n', json.dumps(vars(saved_args), indent=4, separators=(',',':'))) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'rb') as f: _, vocab = pickle.load(f) data_loader = TextLoader(args.test_file, args.sense_file, args.batch_size, args.seq_length, args.data_set_size, shuffle=True) sense_idx = pickle.load(open('wsd_senses_idx.p','rb')) #words to sense dict words_sense = pickle.load(open('verbs_sense.p','rb')) # Predict # =================================== #checkpoint = tf.train.latest_checkpoint(args.save_dir) with tf.Graph().as_default(): with tf.Session() as sess: start = time.time() saver = tf.train.import_meta_graph('./save2/model.ckpt-65.meta') saver.restore(sess,tf.train.latest_checkpoint('./save/')) graph = tf.get_default_graph() graph_x = graph.get_tensor_by_name("x:0") graph_y = graph.get_tensor_by_name("y:0") graph_context_layer = graph.get_tensor_by_name("cont_layer:0") #graph_softmax_loss = graph.get_operation_by_name("softmax_loss") ''' model = BasicLSTM(saved_args, True) saver = tf.train.Saver() saver.restore(sess, checkpoint) ''' data_loader.reset_batch_pointer() x_batch_test, y_batch_test, unk_count_test, n_sent_test, senss_test = data_loader.next_batch_test(collect_sense=True) feed_dict_test = {graph_x: x_batch_test, graph_y: y_batch_test} data_loader.reset_batch_pointer() xs = [] ys = [] senses = [] data_loader.reset_batch_pointer() for i in range(data_loader.num_batches): x_batch, y_batch, unk_count, n_sent, senss = data_loader.next_batch(collect_sense=True, shuffle=False) feed_dict = {graph_x: x_batch, graph_y: y_batch} wordVecs = sess.run(graph_context_layer, feed_dict) n_sents = len(y_batch) for j in range(n_sents): if y_batch[j] != -1: xs.append(wordVecs[j]) ys.append(y_batch[j]) senses.append(senss[j]) #print(xs[0].shape) n_words = np.max(ys) + 1 n_examples = len(ys) sense_vects = {} for i in range(n_examples): if senses[i] in sense_vects: sense_vects[senses[i]].append(xs[i]) else: sense_vects[senses[i]] = [xs[i]] sense_keys = sense_vects.keys() sense_train_counts = {} for key in sense_keys: sense_train_counts[key] = len(sense_vects[key][:]) sense_vects[key] = np.mean(sense_vects[key], axis=0) xs_test = [] ys_test = [] senses_test= [] for i in range(data_loader.num_batches): wordVecs = sess.run(graph_context_layer, feed_dict_test) n_sents = len(y_batch_test) for j in range(n_sents): if y_batch_test[j] != -1: xs_test.append(wordVecs[j]) ys_test.append(y_batch_test[j]) senses_test.append(senss_test[j]) n_tests = len(ys_test) corr = 0 for i in range(n_tests): ambig_word = data_loader.verbs_idx[ys_test[i]] ambig_word = data_loader.words[ambig_word] correct_sense = senses_test[i] if ambig_word in words_sense: max_cos = 0 for sense in words_sense[ambig_word]: if sense in sense_idx: sen = sense_idx[sense] if sen in sense_vects: print(cosine_similarity(sense_vects[sen], xs_test[i])) exit() loss, contextVecs = sess.run(graph_context_layer, feed_dict) print(contextVecs.shape) exit() print("Saved prediction to {}".format(out_path)) print("Total run time: {}s".format(time.time() - start))
def train(args): display_step = 100 num_train = 20000; train_input, train_output, train_length, max_length = get_training_data(args, 'train', num_train, 0) test_input, test_output, test_length, max_length = get_training_data(args, 'test', 25000, 50000) val_input, val_output, val_length, max_length = get_training_data(args, 'val', 25000, 75000) #for i in range(2): # print('i: ' + str(i) + ' => ' + str(train_input[i,:])) train_input = train_input.astype(int) data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = 50000 #data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl')) as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) print("num_layers: ", args.num_layers) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() step = 0 ptr = 0 print('train_input: ', train_input.shape) while step < num_train/args.batch_size: b = step #for b in range(data_loader.num_batches): step += 1 start = time.time() # inputs batch x = np.squeeze(train_input[ptr:ptr+args.batch_size, :args.batch_size]) # output batch y = np.squeeze(train_input[ptr:ptr+args.batch_size, 1:args.batch_size+1]) ptr += args.batch_size+1 #x, y = data_loader.next_batch() #print('x: ', x.shape) #print('y: ', y.shape) #print('x: ', x[1]) #print('y: ', y) feed = {model.input_data: x, model.targets: y, model.initial_state: state} tt, calc_res, reg_cost, train_loss, state, _ = sess.run([model.target_vector, model.logits, model.reg_cost, model.cost, model.final_state, model.train_op], feed) print('out len: ', len(tt)) print('target: ', tt) print('calc_res: ', calc_res) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, reg_cost = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start, reg_cost)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) if step % display_step == 0: print('x: ', x[1])
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size load_model = False if not os.path.exists(args.save_dir): print("Creating directory %s" % args.save_dir) os.mkdir(args.save_dir) elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))): # Trained model already exists ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) args.block_size = saved_args.block_size args.num_blocks = saved_args.num_blocks args.num_layers = saved_args.num_layers args.model = saved_args.model print("Found a previous checkpoint. Overwriting model description arguments to:") print(" model: {}, block_size: {}, num_blocks: {}, num_layers: {}".format( saved_args.model, saved_args.block_size, saved_args.num_blocks, saved_args.num_layers)) load_model = True with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) print("Building the model") model = Model(args) print("Total trainable parameters: {:,d}".format(model.trainable_parameter_count())) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = tf.ConfigProto(log_device_placement=False) with tf.Session(config=config) as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(model.save_variables_list(), max_to_keep=3) if (load_model): print("Loading saved parameters") saver.restore(sess, ckpt.model_checkpoint_path) global_epoch_fraction = sess.run(model.global_epoch_fraction) global_seconds_elapsed = sess.run(model.global_seconds_elapsed) if load_model: print("Resuming from global epoch fraction {:.3f}," " total trained time: {}, learning rate: {}".format( global_epoch_fraction, datetime.timedelta(seconds=float(global_seconds_elapsed)), sess.run(model.lr))) if (args.set_learning_rate > 0): sess.run(tf.assign(model.lr, args.set_learning_rate)) print("Reset learning rate to {}".format(args.set_learning_rate)) data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction) initial_batch_step = int((global_epoch_fraction - int(global_epoch_fraction)) * data_loader.total_batch_count) epoch_range = (int(global_epoch_fraction), args.num_epochs + int(global_epoch_fraction)) writer = tf.summary.FileWriter(args.save_dir, graph=tf.get_default_graph()) outputs = [model.cost, model.final_state, model.train_op, model.summary_op] global_step = epoch_range[0] * data_loader.total_batch_count + initial_batch_step avg_loss = 0 avg_steps = 0 try: for e in range(*epoch_range): state = sess.run(model.zero_state) batch_range = (initial_batch_step, data_loader.total_batch_count) initial_batch_step = 0 for b in range(*batch_range): global_step += 1 if global_step % args.decay_steps == 0: current_learning_rate = sess.run(model.lr) current_learning_rate *= args.decay_rate sess.run(tf.assign(model.lr, current_learning_rate)) print("Decayed learning rate to {}".format(current_learning_rate)) start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} model.add_state_to_feed_dict(feed, state) train_loss, state, _, summary = sess.run(outputs, feed) elapsed = time.time() - start global_seconds_elapsed += elapsed writer.add_summary(summary, e * batch_range[1] + b + 1) if avg_steps < 100: avg_steps += 1 avg_loss = 1 / avg_steps * train_loss + (1 - 1 / avg_steps) * avg_loss print("{:,d} / {:,d} (epoch {:.3f} / {}), loss {:.3f} (avg {:.3f}), {:.3f}s" \ .format(b, batch_range[1], e + b / batch_range[1], epoch_range[1], train_loss, avg_loss, elapsed)) if (e * batch_range[1] + b + 1) % args.save_every == 0 \ or (e == epoch_range[1] - 1 and b == batch_range[1] - 1): save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed) except KeyboardInterrupt: print() finally: writer.flush() global_step = e * data_loader.total_batch_count + b save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed)
def train(args): print(args) data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.training_data_ratio) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) #sess = tf.InteractiveSession() with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter('/tmp', sess.graph) step = 0 for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) #print("model learning rate is {}".format(model.lr.eval())) data_loader.reset_batch_pointer('train') state = model.initial_state.eval() for b in xrange(data_loader.ntrain): start = time.time() x, y = data_loader.next_batch('train') feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() step = e * data_loader.ntrain + b print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(step, args.num_epochs * data_loader.ntrain, e, train_loss, end - start)) if step % args.write_summary_every == 0: # training loss summary_str = sess.run(summary_op, feed_dict=feed) summary_writer.add_summary(summary_str, step) if step % args.save_every == 0 or (step + 1) == (args.num_epochs * data_loader.ntrain): # eval validation loss data_loader.reset_batch_pointer('validation') validation_state = model.initial_state.eval() val_losses = 0 for n in xrange(data_loader.nvalidation): x, y = data_loader.next_batch('validation') val_feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state} validation_loss, validation_state = sess.run([model.cost, model.final_state], val_feed) val_losses += validation_loss validation_loss = val_losses / data_loader.nvalidation print("validation loss is {}".format(validation_loss)) # write top 5 validation loss to a json file args_dict = vars(args) args_dict['step'] = step val_loss_file = args.save_dir + '/val_loss.json' loss_json = '' save_new_checkpoint = False time_int = int(time.time()) args_dict['checkpoint_path'] = os.path.join(args.save_dir, 'model.ckpt-'+str(time_int)) if os.path.exists(val_loss_file): with open(val_loss_file, "r") as text_file: text = text_file.read() if text == '': loss_json = {validation_loss: args_dict} save_new_checkpoint = True else: loss_json = json.loads(text) losses = loss_json.keys() if len(losses) > 3: losses.sort(key=lambda x: float(x), reverse=True) loss = losses[0] if validation_loss < float(loss): to_be_remove_ckpt_file_path = loss_json[loss]['checkpoint_path'] to_be_remove_ckpt_meta_file_path = to_be_remove_ckpt_file_path + '.meta' print("removed checkpoint {}".format(to_be_remove_ckpt_file_path)) if os.path.exists(to_be_remove_ckpt_file_path): os.remove(to_be_remove_ckpt_file_path) if os.path.exists(to_be_remove_ckpt_meta_file_path): os.remove(to_be_remove_ckpt_meta_file_path) del(loss_json[loss]) loss_json[validation_loss] = args_dict save_new_checkpoint = True else: loss_json[validation_loss] = args_dict save_new_checkpoint = True else: loss_json = {validation_loss: args_dict} save_new_checkpoint = True if save_new_checkpoint: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = time_int) print("model saved to {}".format(checkpoint_path + '-' + str(time_int))) with open(val_loss_file, "w") as text_file: json.dump(loss_json, text_file)
def main(_): pp.pprint(FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): print(" [*] Creating checkpoint directory...") os.makedirs(FLAGS.checkpoint_dir) data_loader = TextLoader(os.path.join(FLAGS.data_dir, FLAGS.dataset_name), FLAGS.batch_size, FLAGS.seq_length) vocab_size = data_loader.vocab_size valid_size = 50 valid_window = 100 with tf.variable_scope('model'): train_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size, FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type, FLAGS.seq_length, FLAGS.keep_prob, FLAGS.grad_clip) with tf.variable_scope('model', reuse=True): simple_model = CharRNN(vocab_size, 1, FLAGS.rnn_size, FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type, 1, FLAGS.keep_prob, FLAGS.grad_clip) with tf.variable_scope('model', reuse=True): valid_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size, FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type, FLAGS.seq_length, FLAGS.keep_prob, FLAGS.grad_clip) with tf.Session() as sess: tf.global_variables_initializer().run() train_model.load(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name) best_val_pp = float('inf') best_val_epoch = 0 valid_loss = 0 valid_perplexity = 0 start = time.time() if FLAGS.export: print("Eval...") final_embeddings = train_model.embedding.eval(sess) emb_file = os.path.join(FLAGS.data_dir, FLAGS.dataset_name, 'emb.npy') print("Embedding shape: {}".format(final_embeddings.shape)) np.save(emb_file, final_embeddings) else: # Train current_step = 0 similarity, valid_examples, _ = compute_similarity(train_model, valid_size, valid_window, 6) # save hyper-parameters cPickle.dump(FLAGS.__flags, open(FLAGS.log_dir + "/hyperparams.pkl", 'wb')) # run it! for e in range(FLAGS.num_epochs): data_loader.reset_batch_pointer() # decay learning rate sess.run(tf.assign(train_model.lr, FLAGS.learning_rate)) # iterate by batch for b in range(data_loader.num_batches): x, y = data_loader.next_batch() res, time_batch = run_epochs(sess, x, y, train_model) train_loss = res["loss"] train_perplexity = np.exp(train_loss) iterate = e * data_loader.num_batches + b # print log print("{}/{} (epoch {}) loss = {:.2f}({:.2f}) perplexity(train/valid) = {:.2f}({:.2f}) time/batch = {:.2f} chars/sec = {:.2f}k"\ .format(e * data_loader.num_batches + b, FLAGS.num_epochs * data_loader.num_batches, e, train_loss, valid_loss, train_perplexity, valid_perplexity, time_batch, (FLAGS.batch_size * FLAGS.seq_length) / time_batch / 1000)) current_step = tf.train.global_step(sess, train_model.global_step) # validate valid_loss = 0 for vb in range(data_loader.num_valid_batches): res, valid_time_batch = run_epochs(sess, data_loader.x_valid[vb], data_loader.y_valid[vb], valid_model, False) valid_loss += res["loss"] valid_loss = valid_loss / data_loader.num_valid_batches valid_perplexity = np.exp(valid_loss) print("### valid_perplexity = {:.2f}, time/batch = {:.2f}".format(valid_perplexity, valid_time_batch)) log_str = "" # Generate sample smp1 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"我喜歡做") smp2 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"他吃飯時會用") smp3 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"人類總要重複同樣的") smp4 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"天色暗了,好像快要") log_str = log_str + smp1 + "\n" log_str = log_str + smp2 + "\n" log_str = log_str + smp3 + "\n" log_str = log_str + smp4 + "\n" # Write a similarity log # Note that this is expensive (~20% slowdown if computed every 500 steps) sim = similarity.eval() for i in range(valid_size): valid_word = data_loader.chars[valid_examples[i]] top_k = 8 # number of nearest neighbors nearest = (-sim[i, :]).argsort()[1:top_k+1] log_str = log_str + "Nearest to %s:" % valid_word for k in range(top_k): close_word = data_loader.chars[nearest[k]] log_str = "%s %s," % (log_str, close_word) log_str = log_str + "\n" print(log_str) # Write to log text_file = codecs.open(FLAGS.log_dir + "/similarity.txt", "w", "utf-8") text_file.write(log_str) text_file.close() if valid_perplexity < best_val_pp: best_val_pp = valid_perplexity best_val_epoch = iterate # save best model train_model.save(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name) print("model saved to {}".format(FLAGS.checkpoint_dir)) # early_stopping if iterate - best_val_epoch > FLAGS.early_stopping: print('Total time: {}'.format(time.time() - start)) break
def train(args): model_name = args.data_dir.split("/")[-1] # make a dir to store checkpoints args.save_dir = os.path.join('checkpoints', model_name) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: # instrument for tensorboard summaries = tf.summary.merge_all() writer = tf.summary.FileWriter( os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S"))) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h # instrument for tensorboard summ, train_loss, state, _ = sess.run([summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summ, e * data_loader.num_batches + b) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e == args.num_epochs-1 and b == data_loader.num_batches-1): # remove previous checkpoints current_checkpoints = [f for f in os.listdir(args.save_dir) if os.path.isfile(os.path.join(args.save_dir, f))] for f in current_checkpoints: if model_name in f: os.remove(os.path.join(args.save_dir, f)) # save for the last result checkpoint_path = os.path.join(args.save_dir, model_name) saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) final_model = '{}-{}'.format(model_name, e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) # get the vocab model_vocab = getModelVocab(model_name) # dump the checkpoints to javascript dump_checkpoints(model_vocab, model_name, final_model)
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "chars_vocab.pkl") ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible #UPDATE : Added 'rb' to read back in correcr format with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = pickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = pickle.load(f) assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = { model.input_data: x, model.targets: y, model.initial_state: state } train_loss, state, _ = sess.run( [model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def train(args): print("training on \'"+args.data_dir+"\'") data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: print("RELOADING FROM CHECKPOING") # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl')) as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) print("====================================") printargs(args) print("====================================") model = Model(args) def validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=True): data_loader.reset_batch_pointers() model.resetweights(expectationdropout=expectationdropout) state = model.resetstate() start = time.time() losses = [] backupptrtr = data_loader.pointer_tr entrps = None truths = None allprobs = None for b in range(data_loader.num_batches_te): if TrueIfVal_FalseIfTrain: x, y = data_loader.next_batch_te() else: x, y = data_loader.next_batch_tr() # shapes of x and y are (batchsize, seqlength); each element is an integer from 0 to (vocabsize-1) feed = {model.input_data: x, model.targets: y, model.initial_state: state} feed = model.extrafeed(feed) state, probs, entropies = sess.run([model.final_state, model.probs, model.pred_entropy], feed) theseprobs = np.reshape(probs, (1, args.batch_size, args.seq_length, args.vocab_size)) thesey = np.reshape(y, (args.batch_size, args.seq_length)) allprobs = tryconcat(allprobs, theseprobs, axis=2) truths = tryconcat(truths, thesey, axis=1) y = y.flatten() for ii in range(y.size): losses.append(-np.log2(probs[ii,y[ii]])) thesentropies = np.reshape(entropies,(1,args.batch_size,args.seq_length)) entrps = tryconcat(entrps, thesentropies, axis=2) data_loader.pointer_tr = backupptrtr end = time.time() testtimeperbatch = (end-start) / float(data_loader.num_batches_te) return (np.array(losses), truths, entrps, allprobs, testtimeperbatch) # for tensorboard valsumplh_cost = tf.placeholder(tf.float32, (1,), name="validation_summary_placeholder_cost") valsumplh_pent = tf.placeholder(tf.float32, (1,), name="validation_summary_placeholder_prediction_entropy") #reduce_sum fixes tensorflow scalar handling being weird (vector of size 1) valsumscs_cost = tf.scalar_summary('cost_val', tf.reduce_sum(valsumplh_cost)) valsumscs_pent = tf.scalar_summary('prediction_entropy_val', tf.reduce_sum(valsumplh_pent)) sumwriter = tf.train.SummaryWriter(args.save_dir, graph=tf.get_default_graph()) befstarttime = time.time() with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) print("====================================") allvars = tf.all_variables() trainablevars = tf.trainable_variables() trainableMB = 0 for tvar in allvars: #print(type(tvar)) #print(tvar.name+" -- "+str(tvar.dtype)+" -- "+str(tvar.get_shape())) if tvar in trainablevars: print("@@@ "+tvar.name+" -- "+str(tvar.get_shape())) trainableMB += 4*tvar.get_shape().num_elements() else: print(tvar.name+" -- "+str(tvar.get_shape())) print(" ") print("trainable megabytes: "+str(float(trainableMB)/1e6)) print("====================================") # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): # train model newlr = args.learning_rate * (args.decay_rate ** e) sess.run(tf.assign(model.lr, newlr)) data_loader.reset_batch_pointers() model.resetweights() state = model.resetstate() for b in range(data_loader.num_batches_tr): model.resetweights() # reset weights at every gradient descent iteration, # but don't necessarily reset the state dovalidate = False #if b == (data_loader.num_batches_tr - 1): # dovalidate = True x, y = data_loader.next_batch_tr() # shapes of x and y are (batchsize, seqlength); each element is an integer from 0 to (vocabsize-1) feed = {model.input_data: x, model.targets: y, model.initial_state: state} feed = model.extrafeed(feed) start = time.time() train_loss, state, _, summary = sess.run([model.cost, model.final_state, model.train_op, model.tbsummary], feed) end = time.time() bidx = e * data_loader.num_batches_tr + b sumwriter.add_summary(summary, bidx) epstr = "{}/{} (epoch {})".format(bidx, args.num_epochs * data_loader.num_batches_tr, e+1) if bidx % 100 == 0: print(epstr + ", train_loss = {:.3f}, time/batch = {:.3f}, lr = {:.3f}".format(train_loss, end - start, newlr)) if bidx % args.save_every == 0\ or (e==args.num_epochs-1 and b == data_loader.num_batches_tr-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = bidx) print(epstr+", model saved to {}".format(checkpoint_path)) dovalidate = True if b > 0 and b % args.reset_every == 0: state = model.resetstate() #print(epstr+", reset state in the midst of a training epoch, at batch "+str(b+1)+"/"+str(data_loader.num_batches_tr)) # validate model? if dovalidate: valstr = "" befvaltime = time.time() if False and args.dropout > 1e-3: testlosses = None ytruths = None meanpredentrops = None meanprobdistrs = None testtimeperbatch = 0.0 if e > 95: niters = 29 else: niters = 5 for kk in range(niters): theselosses, thesetruths, theseentrops, theseprobs, thistimeperbatch = validateonce(expectationdropout=False) testlosses = tryconcat(testlosses, theselosses, axis=0) if meanprobdistrs is None: meanprobdistrs = theseprobs meanpredentrops = theseentrops else: meanprobdistrs += theseprobs meanpredentrops += theseentrops if ytruths is None: ytruths = thesetruths testtimeperbatch += (thistimeperbatch / float(niters)) #print("kk == "+str(kk+1)+"/"+str(niters)) meanprobdistrs /= float(niters) meanpredentrops /= float(niters) entropvar = entropyvariance(args, meanprobdistrs, meanpredentrops, plotfig=1) testloss = np.mean(testlosses) testlossstd = np.std(testlosses) rendertext('blue', args.save_dir, 'z_'+str(bidx)+'_JSdiv', ytruths, entropvar) rendertext('blue', args.save_dir, 'z_'+str(bidx)+'_entrop', ytruths, np.reshape(meanpredentrops,(meanpredentrops.shape[1],meanpredentrops.shape[2]))) valpredentropy = np.mean(meanpredentrops) valpredentrstd = np.std( meanpredentrops) suffix = ", estimated from "+str(niters)+" MC samples" else: theselosses, _, theseentrops, _, testtimeperbatch = validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=False) valstr += ", exp. tr. loss "+str(np.mean(theselosses))+", pred-ent "+str(np.mean(theseentrops))+" ("+str(testtimeperbatch)+" spb)" theselosses, _, theseentrops, _, testtimeperbatch = validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=True) testloss = np.mean(theselosses) testlossstd = np.std(theselosses) valpredentropy = np.mean(theseentrops) valpredentrstd = np.std( theseentrops) suffix = ", MC expectation" valstr += ", val loss "+str(testloss)+" w/std "+str(testlossstd)+", pred-ent "+str(valpredentropy)+" w/std "+str(valpredentrstd)+" ("+str(testtimeperbatch)+" spb)"+suffix valsummary1 = sess.run([valsumscs_cost,], {valsumplh_cost:np.array(testloss).reshape((1,))})[0] valsummary2 = sess.run([valsumscs_pent,], {valsumplh_pent:np.array(valpredentropy).reshape((1,))})[0] sumwriter.add_summary(valsummary1, (e+1)*data_loader.num_batches_tr) sumwriter.add_summary(valsummary2, (e+1)*data_loader.num_batches_tr) aftvaltime = time.time() print(epstr+valstr) print("validation time: "+str(aftvaltime-befvaltime)+" sec")
print(__doc__) # 학습에 필요한 설정값들을 지정합니다. data_dir = '_rnn_data' #data_dir = 'data/linux' batch_size = 50 # Training : 50, Sampling : 1 seq_length = 50 # Training : 50, Sampling : 1 hidden_size = 128 # 히든 레이어의 노드 개수 learning_rate = 0.002 num_epochs = 2 num_hidden_layers = 2 grad_clip = 5 # Gradient Clipping에 사용할 임계값 # TextLoader를 이용해서 데이터를 불러옵니다. data_loader = TextLoader(data_dir, batch_size, seq_length) # 학습데이터에 포함된 모든 단어들을 나타내는 변수인 chars와 chars에 id를 부여해 dict 형태로 만든 vocab을 선언합니다. chars = data_loader.chars vocab = data_loader.vocab vocab_size = data_loader.vocab_size # 전체 단어개수 # 인풋데이터와 타겟데이터, 배치 사이즈를 입력받기 위한 플레이스홀더를 설정합니다. input_data = tf.placeholder(tf.int32, shape=[None, None ]) # input_data : [batch_size, seq_length]) target_data = tf.placeholder( tf.int32, shape=[None, None]) # target_data : [batch_size, seq_length]) state_batch_size = tf.placeholder(tf.int32, shape=[]) # Training : 50, Sampling : 1 # RNN의 마지막 히든레이어의 출력을 소프트맥스 출력값으로 변환해주기 위한 변수들을 선언합니다.
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f: saved_words, saved_vocab = cPickle.load(f) assert saved_words==data_loader.words, "Data and loaded model disagree on word set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocab), f) model = Model(args) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(args.log_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_writer.add_graph(sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(model.epoch_pointer.eval(), args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) speed = 0 if args.init_from is None: assign_op = model.epoch_pointer.assign(e) sess.run(assign_op) if args.init_from is not None: data_loader.pointer = model.batch_pointer.eval() args.init_from = None for b in range(data_loader.pointer, data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state, model.batch_time: speed} summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state, model.train_op, model.inc_batch_pointer_op], feed) train_writer.add_summary(summary, e * data_loader.num_batches + b) speed = time.time() - start if (e * data_loader.num_batches + b) % args.batch_size == 0: print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, speed)) if (e * data_loader.num_batches + b) % args.save_every == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) train_writer.close()
def test(test_args): start = time.time() if test_args.json == "true": config_extension = "json" else: config_extension = "pkl" if test_args.json=="true": with open(os.path.join(test_args.save_dir, 'config.%s' %(config_extension)), 'r') as f: args_dict = json.load(f) args = Bunch(args_dict) else: with open(os.path.join(test_args.save_dir, 'config.%s' %(config_extension)), 'rb') as f: args = pickle.load(f) args.save_dir = test_args.save_dir data_loader = TextLoader(args, train=False) test_data = data_loader.read_dataset(test_args.test_file) print(args.save_dir) print("Unit: " + args.unit) print("Composition: " + args.composition) args.word_vocab_size = data_loader.word_vocab_size if args.unit != "word": args.subword_vocab_size = data_loader.subword_vocab_size # Statistics of words print("Word vocab size: " + str(data_loader.word_vocab_size)) # Statistics of sub units if args.unit != "word": print("Subword vocab size: " + str(data_loader.subword_vocab_size)) if args.composition == "bi-lstm": if args.unit == "char": args.bilstm_num_steps = data_loader.max_word_len print("Max word length:", data_loader.max_word_len) elif args.unit == "char-ngram": args.bilstm_num_steps = data_loader.max_ngram_per_word print("Max ngrams per word:", data_loader.max_ngram_per_word) elif args.unit == "morpheme" or args.unit == "oracle": args.bilstm_num_steps = data_loader.max_morph_per_word print("Max morphemes per word", data_loader.max_morph_per_word) if args.unit == "word": lm_model = WordModel elif args.composition == "addition": lm_model = AdditiveModel elif args.composition == "bi-lstm": lm_model = BiLSTMModel else: sys.exit("Unknown unit or composition.") print("Begin testing...") with tf.Graph().as_default(), tf.Session() as sess: with tf.variable_scope("model"): mtest = lm_model(args, is_training=False, is_testing=True) # save only the last model saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) tf.initialize_all_variables().run() ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity) print("Test time: %.0f\n" % (time.time() - start)) print("\n")
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "chars_vocab.pkl") ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.latest_checkpoint(args.init_from) assert ckpt, "No checkpoint found" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: # instrument for tensorboard summaries = tf.summary.merge_all() writer = tf.summary.FileWriter( os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S"))) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt) for e in range(args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h # instrument for tensorboard summ, train_loss, state, _ = sess.run( [summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summ, e * data_loader.num_batches + b) end = time.time() print( "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e == args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def main(): args = parse_args() with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) loader = TextLoader(saved_args.data_dir, saved_args.batch_size, \ saved_args.seq_length, isTraining=False) saved_args.batch_size = 1 # Set batch size to 1 when sampling model = Model(saved_args, training=False) with open(args.test_file, 'r') as f: testset = f.readlines() testset_len = len(testset) lut = {} next_char_probs = {} vocab = loader.vocab charset = vocab.keys() charset_ordered = sorted(vocab.keys(), key=(lambda key: vocab[key])) results = [] results_len = 0 # Load first character probabilities first_char_probs = loader.first_char_probs for c in charset: if first_char_probs.has_key(c): if vocab[c] == 0: continue else: lut[c] = first_char_probs[c] total_start = time.time() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) begin_index = 0 if args.init_from != None: lut_file = os.path.join(args.save_dir, 'lut.pkl') with open(lut_file, 'rb') as f: lut = pickle.load(f) next_char_probs_file = os.path.join(args.save_dir, 'next_char_probs.pkl') with open(next_char_probs_file, 'rb') as f: next_char_probs = pickle.load(f) print("lut initialized from {}".format(lut_file)) print("next_char_probs initialized from {}".format( next_char_probs_file)) partial_results_file = args.output_file + "-" + str(args.init_from) with open(partial_results_file, 'r') as f: results = f.readlines() assert len(results) == args.init_from, "Unexpected error!" results_len = len(results) begin_index = args.init_from start = time.time() for testline in testset[begin_index:]: result_prob = 0.0 # print "testline: " + testline # Find probability for existing prefix for k in range(1, len(testline)): if not lut.has_key(testline[:-k]): continue current_prefix = testline[:-k] # print "Found prefix in lut: " + current_prefix result_prob = lut[current_prefix] # Find probability for the rest of the string for m in range(k): # Try to get next possible characters' probabilities from dict if next_char_probs.has_key(current_prefix): # print "Found next char prob of <" + current_prefix + "> in dict!" next_char_prob = next_char_probs[current_prefix] # Otherwise get next possible characters' probabilities by NN else: length = len(current_prefix) line = np.array(map(vocab.get, current_prefix)) line = np.pad(line, (0, saved_args.seq_length - len(line)), 'constant') feed = { model.input_data: [line], model.sequence_lengths: [length] } probs = sess.run([model.probs], feed) probs = np.reshape(probs, (-1, saved_args.vocab_size)) next_char_prob = probs[length - 1] # Add next possible characters' probabilities to dict next_char_probs[current_prefix] = next_char_prob next_char = testline[-k + m] current_prefix += next_char result_prob *= next_char_prob[vocab[next_char]] # Add new string to lut lut[current_prefix] = result_prob break # print str(result_prob) + '\t' + current_prefix results.append(str(result_prob) + '\n') results_len += 1 if results_len % args.display_every == 0: end = time.time() print("Progress: {}/{}; time taken = {}".format( results_len, testset_len, end - start)) start = time.time() if results_len % args.save_every == 0: lut_file = os.path.join(args.save_dir, 'lut.pkl') with open(lut_file, 'wb') as f: pickle.dump(lut, f) next_char_probs_file = os.path.join(args.save_dir, 'next_char_probs.pkl') with open(next_char_probs_file, 'wb') as f: pickle.dump(next_char_probs, f) print("lut saved to {}".format(lut_file)) print( "next_char_probs saved to {}".format(next_char_probs_file)) partial_results_file = args.output_file + "-" + str( len(results)) with open(partial_results_file, 'w') as f: f.writelines(results) end = time.time() print("Written partial results to {}; time taken = {}".format( partial_results_file, end - start)) start = time.time() if args.early_exit != None and results_len >= args.early_exit: break with open(args.output_file, 'w') as f: f.writelines(results) total_end = time.time() print( "Finished assigning probabilities to {} passwords; total time taken = {}" .format(len(results), total_end - total_start))