def main(unused_args): config = get_config(FLAGS.model_size) eval_config = get_config(FLAGS.model_size) saved_model_path = FLAGS.model_path weights_dir = FLAGS.weights_dir verbose = FLAGS.verbose debug = FLAGS.debug if weights_dir is not None: if not os.path.exists(weights_dir): os.mkdir(weights_dir) if not debug: raw_data = reader.ptb_raw_data(FLAGS.data_path, "ptb.train.txt", "ptb.valid.txt", "ptb.test.txt") else: raw_data = reader.ptb_raw_data(FLAGS.data_path, "emma.txt", "emma.val.txt", "emma.test.txt") # load up PTB data train_data, val_data, test_data, vocab, word_to_id = raw_data with tf.Graph().as_default(), tf.Session() as session: initialiser = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope('model', reuse=None, initializer=initialiser): m = ACTModel(config,is_training=True) # if we have a saved/pre-trained model, load it. if saved_model_path is not None: saveload.main(saved_model_path, session) with tf.variable_scope("model", reuse=True): m_val = ACTModel(config, is_training=False) m_test = ACTModel(eval_config,is_training=False) tf.initialize_all_variables().run() print("starting training") for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) session.run(tf.assign(m.lr, config.learning_rate * lr_decay)) train_loss = run_epoch(session, m, train_data, m.train_op, verbose=True) valid_loss = run_epoch(session, m_val, val_data, tf.no_op()) if verbose: print("Epoch: {} Learning rate: {}".format(i + 1, session.run(m.lr))) print("Epoch: {} Train Loss: {}".format(i + 1, train_loss)) print("Epoch: %d Valid Loss: %.3f" % (i + 1, valid_loss)) # save weights in a pickled dictionary format if weights_dir is not None: date = "{:%m.%d.%H.%M}".format(datetime.now()) saveload.main(weights_dir + "/Epoch_{:02}Train_{:0.3f}Val_{:0.3f}date{}.pkl" .format(i+1,train_loss,valid_loss, date), session) test_loss = run_epoch(session, m_test, test_data, tf.no_op()) if verbose: print("Test Perplexity: %.3f" % test_loss)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") train_data, valid_data, test_data, _ = reader.ptb_raw_data(FLAGS.data_path) config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) tf.initialize_all_variables().run() for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print "Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m._lr)) train_perplexity = run_epoch(session, m, train_data, m._train_op) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
def InterpBeadErrorEval(w1,b1, w2,b2, tt): raw_data = reader.ptb_raw_data(data_path) train_data, _, test_data, _ = raw_data t = tt/20. thiserror = 0 weights, biases = model_interpolate(w1,b1,w2,b2, t) interp_model = PTBModel(config=config,w=weights, b=biases) with interp_model.g.as_default(): with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="TestInput") with tf.variable_scope("Interp", reuse=False): inputs_for_testing = tf.nn.embedding_lookup(interp_model.weights['e'], test_input.input_data) inputs_for_testing = [tf.squeeze(input_step, [1]) for input_step in tf.split(1, test_input.num_steps, inputs_for_testing)] pred_test = interp_model.predict(inputs_for_testing) loss_test = tf.nn.seq2seq.sequence_loss_by_example([pred_test], [tf.reshape(test_input.targets, [-1])], [tf.ones([test_input.batch_size * test_input.num_steps], dtype=data_type())]) cost_test = tf.reduce_sum(loss_test) / test_input.batch_size init = tf.initialize_all_variables() sv = tf.train.Supervisor() with sv.managed_session() as session: session.run(init) tv = tf.trainable_variables() test_perplexity = run_epoch(session, interp_model, test_input, cost_test, verbose=True) print "Test perplexity: " + str(test_perplexity)
def main(_): ##### Configure these based on current situation. ##### preload_model = False # Shall we preload preloaded_epoch or train it from scratch? preloaded_epoch = 0 # The epoch to load (if required). Counting from 0. ####################################################### if preload_model: load_model_file = "model{}.ckpt".format(preloaded_epoch) preloaded_epoch += 1 else: preloaded_epoch = 0 if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() # Add ops to save and restore all the variables. saver = tf.train.Saver() if(preload_model): saver.restore(session, load_model_file) for i in range(preloaded_epoch, config.max_max_epoch): # Some simple learning rate scheduling. :-) if(i>3): config.learning_rate = 0.1 lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) # Save the variables to disk. save_path = saver.save(session, "model{}.ckpt".format(i)) print("Model saved in file: %s" % save_path) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def testPtbRawData(self): tmpdir = tf.test.get_temp_dir() for suffix in "train", "valid", "test": filename = os.path.join(tmpdir, "ptb.%s.txt" % suffix) with tf.gfile.GFile(filename, "w") as fh: fh.write(self._string_data) # Smoke test output = reader.ptb_raw_data(tmpdir) self.assertEqual(len(output), 4)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session() as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) try: sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step) except: print("Error on line 370:sv.saver.save\nsession=%s\nsave_path=%s\nglobal_step=%s" % (session, FLAGS.save_path, sv.global_step))
def testPtbRawData(self): tmpdir = 'texts/simple-examples/data' for suffix in "train", "valid", "test": filename = os.path.join(tmpdir, "ptb.%s.txt" % suffix) with gfile.GFile(filename, "w") as fh: fh.write(self._string_data) # Smoke test output = reader.ptb_raw_data(tmpdir) print output self.assertEqual(len(output), 4)
def main(): # --data_path=/tmp/simple-examples/data/ --model small data_path = '/home/hact/Downloads/simple-examples/data/' model_option = 'small' if not data_path: raise ValueError("Must set --data_path to PTB data directory") out_dir = 'models' checkpoint_dir = os.path.join(out_dir, "checkpoints") raw_data = reader.ptb_raw_data(data_path) train_data, valid_data, test_data, vocabulary = raw_data config = get_config(model_option) eval_config = get_config(model_option) eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) # tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: model_checkpoint_path_arr = ckpt.model_checkpoint_path.split("/") abs_model_checkpoint_path = checkpoint_dir + '/' + model_checkpoint_path_arr[-1] saver.restore(session, abs_model_checkpoint_path) # for i in range(config.max_max_epoch): # lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) # m.assign_lr(session, config.learning_rate * lr_decay) # # print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) # train_perplexity = run_epoch(session, m, train_data, m.train_op, # verbose=True, vocabulary=vocabulary) # print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) # valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op(), vocabulary=vocabulary) # print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) # # path = saver.save(session, checkpoint_prefix, global_step=i) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op(), vocabulary=vocabulary) print("Test Perplexity: %.3f" % test_perplexity)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data # test: list (82430) # train: list (929589) # valid: list (73760) config = SmallConfig() eval_config = SmallConfig() eval_config.batch_size = 1 eval_config.num_steps = 1 initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) with tf.Session() as session: for i in range(config.max_max_epoch): # Learning rate decay start after config.max_epoch. lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity)
def testPtbRawData(self): print('testPtbRawData') #tmpdir = tf.test.get_temp_dir() tmpdir = '../../data/ptb-simple-examples/data/' #print(tmpdir) for suffix in "train", "valid", "test": filename = os.path.join(tmpdir, "ptb.%s.txt" % suffix) with tf.gfile.GFile(filename, "w") as fh: fh.write(self._string_data) # Smoke test #output = reader.ptb_raw_data(tmpdir) train_data, valid_data, test_data, vocabulary = reader.ptb_raw_data(tmpdir) print(len(train_data), train_data[0], len(valid_data), vocabulary) #print(output[0].shape) #self.assertEqual(len(output), 4) return train_data
def main(unused_args): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() """ # Add ops to save and restore all the variables. """ saver = tf.train.Saver() for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) """ # Save the variables to disk. """ save_path = saver.save(session, "./SAVE/" + str(i) + FLAGS.save_model) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def main(_): raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, vocab_len = raw_data def get_config(): """Returns the model config required according to FLAGS.model.""" if FLAGS.model == "small": return SmallConfig() elif FLAGS.model == "medium": return MediumConfig() elif FLAGS.model == "large": return LargeConfig() elif FLAGS.model == "test": return TestConfig() else: raise ValueError("Invalid model: %s" % FLAGS.model) config = get_config() config.vocab_size = vocab_len eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 # Train and eval input functions train_input = PTBInput(params=config, data=train_data, name="TrainInput") config.epoch_size = train_input.epoch_size valid_input = PTBInput(params=config, data=valid_data, name="ValidInput") test_input = PTBInput(params=eval_config, data=test_data, name="TestInput") model_function = model_fn sess_config = tf.estimator.RunConfig( log_step_count_steps=500) ptb_word_lm = tf.estimator.Estimator( model_fn=model_function, config=sess_config, model_dir=FLAGS.save_path, params=config) for _ in range(config.max_max_epoch): ptb_word_lm.train(input_fn=train_input.input_data) ptb_word_lm.evaluate(input_fn=valid_input.input_data) ptb_word_lm.evaluate(input_fn=test_input.input_data)
def train(self): raw_data = reader.ptb_raw_data("/home/kevin/Documents/Datasets/simple-examples/data") train_data, valid_data, test_data, vocabsize = raw_data print vocabsize saver = tf.train.Saver(max_to_keep=2) for epoch in xrange(10000): total_genloss = 0 total_latentloss = 0 steps = 0 for step, x in enumerate(reader.ptb_iterator(test_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, _ = self.sess.run([self.generation_loss, self.update], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2}) gl = np.mean(gen_loss) / self.sentence_length total_genloss += gl steps = steps + 1 print "epoch %d genloss %f perplexity %f" % (epoch, total_genloss / steps, np.exp(total_genloss/steps)) total_validloss = 0 validsteps = 0 for step, x in enumerate(reader.ptb_iterator(valid_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, _ = self.sess.run([self.generation_loss, self.update], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2}) gl = np.mean(gen_loss) / self.sentence_length total_validloss += gl validsteps = validsteps + 1 print "valid %d genloss %f perplexity %f" % (epoch, total_validloss / validsteps, np.exp(total_validloss/validsteps))
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 size = config.hidden_size vocab_size = config.vocab_size ############################ print("loading data") train_input = PTBInput(config=config, data=train_data, name="TrainInput") #initializer = tf.random_uniform_initializer(-config.init_scale,config.init_scale) initializer = tf.random_uniform_initializer(0.5, 0.5) with tf.variable_scope("Model", reuse=None, initializer=initializer): mention_model = BNRModel(is_training=True, config=config, input_=train_input) # state = mention_model._initial_state batch_size = config.batch_size num_steps = config.num_steps output = mention_model.output state = mention_model.state softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], [tf.reshape(mention_model._input.targets, [-1])], [tf.ones([batch_size * num_steps], dtype=data_type())]) mention_model._cost = cost = tf.reduce_sum(loss) / batch_size mention_model._final_state = state mention_model.softmax_w = softmax_w mention_model.softmax_b = softmax_b ################################################################################### ######################## Learning : painful version ############################## mention_model._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(mention_model._lr) mention_model._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) mention_model._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") mention_model._lr_update = tf.assign(mention_model._lr, mention_model._new_lr) #################################################################################### #################################################################################### optimizer1 = tf.train.AdamOptimizer( learning_rate=0.0001).minimize(cost) print("For output") ########### Run epoch ############## ####################################### #with tf.Graph().as_default(): sv = tf.train.Supervisor(logdir=FLAGS.save_path) print("Starting session ...") with sv.managed_session() as session: start_time = time.time() i = 0 lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) # mention_model.assign_lr(session, config.learning_rate * lr_decay) costs = 0.0 iters = 0 verbose = True state = session.run(mention_model.initial_state) fetches = { #"cost": model.cost, "cost": cost, "final_state": mention_model.final_state, } eval_op = mention_model._train_op fetches["eval_op"] = eval_op print("beginning steps ...") for step in range(mention_model.input.epoch_size): feed_dict = {} for i, (c, h) in enumerate(mention_model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h print(state[0][0][0][:4]) vals = session.run(fetches, feed_dict) ############################################ pdb.set_trace() #session.run(optimizer1,feed_dict) ############################################ cost = vals["cost"] state = vals["final_state"] costs += cost iters += mention_model.input.num_steps print("For parameters") if verbose and step % (mention_model.input.epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / mention_model.input.epoch_size, np.exp(costs / iters), iters * mention_model.input.batch_size / (time.time() - start_time))) print(np.exp(costs / iters)) pdb.set_trace() ####################################### Initial code working ########################################### ######################################################################################################## with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): #m = PTBModel(is_training=True, config=config, input_=train_input) m = BNRModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) #mvalid = BNRModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) #mtest = BNRModel(is_training=False, config=eval_config, input_=test_input) sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session() as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) #train_perplexity = run_epoch(session, m, loss, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) #valid_perplexity = run_epoch(session, mvalid, loss) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) #test_perplexity = run_epoch(session, mtest) test_perplexity = run_epoch(session, mtest, loss) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): # 原始数据 train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) # 计算一个epoch需要训练的次数 train_data_len = len(train_data) # 数据集的大小 train_batch_len = train_data_len // TRAIN_BATCH_SIZE # batch的个数 train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP # 该epoch的训练次数 valid_data_len = len(valid_data) valid_batch_len = valid_data_len // EVAL_BATCH_SIZE valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP test_data_len = len(test_data) test_batch_len = test_data_len // EVAL_BATCH_SIZE test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP # 定义初始化函数 initializer = tf.random_uniform_initializer(-0.05, 0.05) # 定义训练用的模型 with tf.variable_scope('language_model', reuse=None, initializer=initializer): train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP) # 定义评估用的模型 with tf.variable_scope('language_model', reuse=True, initializer=initializer): eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP) # 生成数据队列,必须放在开启多线程之前 train_queue = reader.ptb_producer(train_data, train_model.batch_size, train_model.num_steps) valid_queue = reader.ptb_producer(valid_data, eval_model.batch_size, eval_model.num_steps) test_queue = reader.ptb_producer(test_data, eval_model.batch_size, eval_model.num_steps) with tf.Session() as sess: tf.global_variables_initializer().run() # 开启多线程从而支持ptb_producer()使用tf.train.range_input_producer() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 使用训练数据训练模型 for i in range(NUM_EPOCH): print('In iteration: %d' % (i + 1)) run_epoch(sess, train_model, train_queue, train_model.train_op, True, train_epoch_size) # 训练模型 valid_perplexity = run_epoch(sess, eval_model, valid_queue, tf.no_op(), False, valid_epoch_size) # 使用验证数据评估模型 print('Epoch: %d Validation Perplexity: %.3f' % (i + 1, valid_perplexity)) # 使用测试数据测试模型 test_perplexity = run_epoch(sess, eval_model, test_queue, tf.no_op(), False, test_epoch_size) print('Test Perplexity: %.3f' % test_perplexity) # 停止所有线程 coord.request_stop() coord.join(threads)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.scalar_summary("Training Loss", m.cost) tf.scalar_summary("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.scalar_summary("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) #saver = tf.train.Saver() #summary_op = tf.merge_all_summaries() sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir=FLAGS.save_path, #summary_op=summary_op, #saver=saver, save_summaries_secs=10) #sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session(server.target) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) #sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step) #sv.stop() '''
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") FLAGS.save_path = FLAGS.save_path.strip('/') + '-' + FLAGS.model + '/' raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) if FLAGS.test == None: with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.scalar_summary("Training Loss", m.cost) tf.scalar_summary("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.scalar_summary("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) else: with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session() as session: if FLAGS.test != None: print("Restoring model from " + FLAGS.test) mtest.saver.restore(session, FLAGS.test) else: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) if FLAGS.test == None: test_perplexity = run_epoch(session, mtest) else: test_perplexity, top1, top5, top10 = run_epoch(session, mtest) print("Top 1 Acc: %.3f" % top1) print("Top 5 Acc: %.3f" % top5) print("Top 10 Acc: %.3f" % top10) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.test == None and FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path + 'save')
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError("Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) #train_data, valid_data, test_data, _ = raw_data train_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): # If we are testing an existing model ... if FLAGS.load_path: # NOTE: there are two ways to restore an existing model, rebuilding the graph from scratch then # calling saver.restore for those objects, or importing the old metagraph then calling saver.restore # and then fetching the ops/tensors via methods like get_tensor_by_name # what follows is the first method with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None): m = PTBModel(is_training=True, config=config, input_=train_input, name="Train") tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") #,iter=0) with tf.variable_scope("Model", reuse=True): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input, name="Test") session = tf.InteractiveSession() saver = tf.train.Saver( ) #tf.train.import_meta_graph(FLAGS.load_path + ".meta") saver.restore(session, FLAGS.load_path) #mtest.import_ops() print("Model restored from %s." % FLAGS.load_path) of = open("HPL2.out", 'w') run_epoch(session, mtest, input=test_data[0], ep_size=len(test_data[0]) - 1, of=of) #run_epoch(session, mtest, input=test_input)#, ep_size=len(test_data[0]), ) iter = 1 for i in range(len(test_data) - 1): run_epoch(session, mtest, input=test_data[iter], ep_size=len(test_data[iter]) - 1, of=of) #run_epoch(session,mtest, input=test_input)#test_data[iter], ep_size = len(test_data[iter])) iter += 1 of.close() quit() # If we are training a model .... initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) #with tf.name_scope("Valid"): # valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") # with tf.variable_scope("Model", reuse=True, initializer=initializer): # mvalid = PTBModel(is_training=False, config=config, input_=valid_input) # tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) models = { "Train": m, "Test": mtest } #, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError( "num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: if not FLAGS.load_path: sv.saver.restore(session, FLAGS.save_path + "-13450") for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) #valid_perplexity = run_epoch(session, mvalid) #print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) #test_perplexity = run_epoch(session, mtest) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) print("SAVED TO: %s." % sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)) sv.saver.export_meta_graph(FLAGS.save_path + ".meta") else: test_perplexity = run_epoch(session, mtest)
def train(dim_word=100, # word vector dimensionality dim=1000, # the number of GRU units encoder='gru', max_epochs=5000, finish_after=10000000, # finish after this many updates dispFreq=100, decay_c=0., # L2 weight decay penalty lrate=0.01, n_words=100000, # vocabulary size maxlen=100, # maximum length of the description batch_size=16, valid_batch_size=16, max_grad_norm=5, nlayers=1, data_path=None, use_dropout=False, platoon=False, name=""): # Model options model_options = locals().copy() print 'Loading data' raw_data = reader.ptb_raw_data(data_path) train_data, valid_data, test_data, _ = raw_data pprint.pprint(model_options) print 'Building model' params = init_params(model_options) # create shared variables for parameters tparams = init_tparams(params) if platoon: print "PLATOON: Init ...", from platoon.channel import Worker from platoon.param_sync import ASGD worker = Worker(control_port=5567) print "DONE" print "PLATOON: Initializing shared params ...", worker.init_shared_params(tparams.values(), param_sync_rule=ASGD()) print "DONE" worker.send_req({"type": name}) # build the symbolic computational graph trng, use_noise, \ x, \ opt_ret, \ cost, ups = \ build_model(tparams, model_options) inps = [x] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, updates=ups) print 'Done' # before any regularizer - will be used to compute ppl print 'Building f_cost...', cost_sum = cost.sum() f_cost = theano.function(inps, cost_sum, updates=ups) print 'Done' cost = cost.mean() # apply L2 regularization on weights if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay print 'Computing gradient...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Done' # compile the optimizer, the actual computational graph is compiled here lr = tensor.scalar(name='lr') print 'Building optimizers...', f_grad_shared, f_update = sgd(lr, tparams, grads, inps, cost, max_grad_norm) print 'Done' print 'Optimization' history_errs = [] history_ppls = [] wpss = [] best_p = None # Training loop uidx = 0 estop = False bad_counter = 0 try: for eidx in xrange(max_epochs): n_samples = 0 tlen = 0 start_time = time.time() for x, y in reader.ptb_iterator(train_data, batch_size, maxlen): if platoon: #print "PLATOON: Copying data from master ...", worker.copy_to_local() #print "DONE" n_samples += len(x) uidx += 1 use_noise.set_value(1.) tlen += (x.shape[0] * x.shape[1]) # pad batch and create mask if x is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() # compute cost, grads and copy grads to shared variables cost = f_grad_shared(x) # do the update on parameters f_update(lrate) ud = time.time() - ud_start if platoon: #print "PLATOON: Syncing with master ...", worker.sync_params(synchronous=True) #print "DONE" # check for bad numbers if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1. # verbose if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud # finish after this many updates if uidx >= finish_after: print 'Finishing after %d iterations!' % uidx estop = True break current_time = time.time() wps = int(tlen // (current_time - start_time)) print "Current wps", wps wpss.append(wps) print 'Seen %d samples' % n_samples if platoon: print "PLATOON: Sending wps to controller ...", worker.send_req({'wps': wps, 'epoch': eidx}) print "DONE" print "Avg wps, ", numpy.mean(wpss) print "Std avgs,", numpy.std(wpss) use_noise.set_value(0.) finally: if platoon: print "PLATOON: Closing worker ...", worker.send_req('done') worker.close() print "DONE" return 0
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError("Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _, dict_id_word = raw_data dict_word_id = dict(zip(dict_id_word.values(), dict_id_word.keys())) eos_id = dict_word_id['<eos>'] config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = np.shape(test_data)[0] - 1 saver = None filename = None if FLAGS.save_path: filename = FLAGS.save_path + '/lmodel.ckpt' with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) tf.summary.scalar("Test Loss", mtest.cost) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError( "num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) if FLAGS.save_path: saver = tf.train.Saver() try: saver.restore(tf.Session(), filename) except Exception as e: pass with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: predicted_word_output = word_predict(session, mtest, predict_op=mtest._output, log_output=True, dict_ids=dict_id_word) if FLAGS.save_path: print("Saving model to %s." % filename) save_path = saver.save(session, filename)
############## Load the DATA ############################################# ########################################################################### """ We will load the data into the RAM but we will until we build the graph to transform it into TensorFlow elements and divide into batches with XXXX """ data_to_use = "aritificial" # ptb aritificial if (data_to_use == "ptb" ): model_select= "small" # test small data_path = "../data" # Read the words from 3 documents and convert them to ids with a vocabulary raw_data = reader.ptb_raw_data(data_path) """ Raw data contains 3 lists of a lot of words: - [0]: List of ids of the words for train - [1]: List of ids of the words for validation - [3]: List of ids of the words for validation - [4]: Number of words in the vocabulary. """ train_data, valid_data, test_data, word_to_id, _ = raw_data # Create dictonary from ids to words. id_to_word = np.array(list(word_to_id.keys())) print (["Most common words: ", id_to_word[0:5]]) # Create the objects with the hyperparameters that will be fed to the network train_config = Bconf.get_config(model_select,mixing_pi,prior_log_sigma1,prior_log_sigma2 )
def main(_):#"========================================================================================================" if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError( "Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput( config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config,input_=test_input) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError("num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) save_path = './data/model/mode/model.ckpt' with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session,save_path) # sess = session # model_path = './data/model/' # path = './data/model/' # dir_list = os.listdir(path) # if len(dir_list) == 0: # version = 1 # else: # last_version = len(dir_list) # version = last_version + 1 # path = path + "{}".format(str(version)) # prediction_signature = ( # tf.saved_model.signature_def_utils.build_signature_def( # inputs={'input_images': tf.saved_model.utils.build_tensor_info(train_input.input_data)}, # outputs={'output': tf.saved_model.utils.build_tensor_info(m.logits)}, # method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME # ) # ) # builder = tf.saved_model.builder.SavedModelBuilder(path) # builder.add_meta_graph_and_variables( # sess, [tf.saved_model.tag_constants.SERVING], # signature_def_map={ # 'generate_images': prediction_signature # }, # legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op')) # builder.save(as_text=False) with tf.Session(graph=tf.Graph()) as sess: saver = tf.train.import_meta_graph("./data/model/model.ckpt-0.meta") saver.restore(sess,"./data/model/model.ckpt-0") print("Model restore") path = './data/model/' dir_list = os.listdir(path) if len(dir_list) == 0: version = 1 else: last_version = len(dir_list) version = last_version + 1 path = path + "{}".format(str(version)) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'input_images': tf.saved_model.utils.build_tensor_info(train_input.input_data)}, outputs={'output': tf.saved_model.utils.build_tensor_info(m.logits)}, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME ) ) builder = tf.saved_model.builder.SavedModelBuilder(path) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'generate_images': prediction_signature }, legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op')) builder.save(as_text=False)
def main(_): if FLAGS.config == None: raise ValueError("Please specify a configuration file.") else: config = configuration.get_config(FLAGS.config) if os.path.isfile('{0}.final'.format(config['name'])): raise StandardError( "{0}.final already exists. If you want to re-train the model, remove the model file and its checkpoints." .format(config['name'])) fout = file(config['log'], 'w') sys.stdout = writer(sys.stdout, fout) print('configuration:') for par, value in config.iteritems(): print('{0}\t{1}'.format(par, value)) eval_config = config.copy() # same parameters for evaluation, except for: eval_config['batch_size'] = 1 # batch_size eval_config['num_steps'] = 1 # and number of steps # all_data = tuple (train_data, valid_data, test_data), id_to_word = mapping from ids to words, # total_length = total length of all padded sentences in case the data is processed per sentence all_data, id_to_word, total_length = reader.ptb_raw_data(config) train_data = all_data[0] valid_data = all_data[1] test_data = all_data[2] # if processing per sentence if 'per_sentence' in config: # set num_steps = total length of each (padded) sentence config['num_steps'] = total_length # vocab is expanded with <bos> and padding symbol @ config['vocab_size'] = len(id_to_word) eval_config['vocab_size'] = len(id_to_word) with tf.Graph().as_default(): # always use the same seed for random initialization (to better compare models) tf.set_random_seed(1) initializer = tf.random_uniform_initializer(-config['init_scale'], config['init_scale']) with tf.name_scope("Train"): train_input = inputLM(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = LM(is_training=True, config=config, input_=train_input) tf.scalar_summary("Training Loss", m.cost) tf.scalar_summary("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = inputLM(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = LM(is_training=False, config=config, input_=valid_input) tf.scalar_summary("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = inputLM(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = LM(is_training=False, config=eval_config, input_=test_input) # sv = training helper that checkpoints models and computes summaries sv = tf.train.Supervisor(logdir=config['save_path']) # managed_session launches the checkpoint and summary servieces with sv.managed_session() as session: if 'early_stop' in config: debug('early stopping\n') if DEBUG and not isinstance(config['early_stop'], int): raise AssertionError( 'early_stop in config file should be an integer \ (the number of validation ppls you compare with).') else: val_ppls = [] # training loop for i in range(config['max_max_epoch']): # calculate exponential decay lr_decay = config['lr_decay']**max(i + 1 - config['max_epoch'], 0.0) # assign new learning rate to session + run the session m.assign_lr(session, config['learning_rate'] * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, id_to_word, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, id_to_word) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) if 'early_stop' in config: num_times_no_improv = 0 debug( 'current list of validation ppls of previous epochs: {0}\n' .format(val_ppls)) if i > config['early_stop'] - 1: debug( 'epoch {0}: check whether validation ppl has improved\n' .format(i + 1)) if DEBUG and (len(val_ppls) != config['early_stop']): raise AssertionError( 'Length of list of validation ppls should be equal to the early stopping value.' ) for previous_ppl in val_ppls: if valid_perplexity >= previous_ppl: debug( 'current validation ppl ({0}) is higher than previous validation ppl ({1})\n' .format(valid_perplexity, previous_ppl)) num_times_no_improv += 1 else: debug( 'current validation ppl ({0}) is lower than previous validation ppl ({1})\n' .format(valid_perplexity, previous_ppl)) val_ppls.pop(0) else: debug( 'epoch {0}: do NOT check whether validation ppl has improved\n' .format(i + 1)) val_ppls.append(valid_perplexity) debug( 'new list of validation ppls of previous epochs: {0}\n' .format(val_ppls)) if num_times_no_improv == config['early_stop']: best_model = 0 best_ppl = val_ppls[0] # find previous model with best validation ppl for idx, previous_ppl in enumerate(val_ppls[1:]): if previous_ppl < best_ppl: best_ppl = previous_ppl best_model = idx # filename of the best model file_best_model = '{0}.{1}'.format( config['name'], i - (config['early_stop'] - best_model)) name_best_model = '{0}.final'.format(config['name']) debug( 'model with best validation ppl: epoch {0} (ppl {1})' .format(best_model, best_ppl)) # set best model to 'final model' os.system('ln -s {0} {1}'.format( file_best_model, name_best_model)) break else: if 'save_path' in config: print('Saving model to {0}.{1}'.format( config['name'], i + 1)) #sv.saver.save(session, '{0}best_valid_ppl_{1}'.format(config['save_path'], i) , global_step=sv.global_step) sv.saver.save( session, '{0}.{1}'.format(config['name'], i + 1)) test_perplexity = run_epoch(session, mtest, id_to_word) print("Test Perplexity: %.3f" % test_perplexity) # no early stopping: just take model of last epoch as final model if not 'early_stop' in config: print('No early stopping, saving final model to {0}.final'. format(config['name'])) #sv.saver.save(session, '{0}.final'.format(config['name']), global_step=sv.global_step) sv.saver.save(session, '{0}.final'.format(config['name']))
def train(): print('data_path: %s' % FLAGS.data_path) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, valid_nbest_data, vocab = raw_data train_data = chop(train_data, vocab['<eos>']) config = MediumConfig() if FLAGS.init_scale: config.init_scale = FLAGS.init_scale if FLAGS.learning_rate: config.learning_rate = FLAGS.learning_rate if FLAGS.max_grad_norm: config.max_grad_norm = FLAGS.max_grad_norm if FLAGS.num_layers: config.num_layers = FLAGS.num_layers if FLAGS.num_steps: config.num_steps = FLAGS.num_steps if FLAGS.hidden_size: config.hidden_size = FLAGS.hidden_size if FLAGS.max_epoch: config.max_epoch = FLAGS.max_epoch if FLAGS.max_max_epoch: config.max_max_epoch = FLAGS.max_max_epoch if FLAGS.keep_prob: config.keep_prob = FLAGS.keep_prob if FLAGS.lr_decay: config.lr_decay = FLAGS.lr_decay if FLAGS.batch_size: config.batch_size = FLAGS.batch_size config.vocab_size = len(vocab) print('init_scale: %.2f' % config.init_scale) print('learning_rate: %.2f' % config.learning_rate) print('max_grad_norm: %.2f' % config.max_grad_norm) print('num_layers: %d' % config.num_layers) print('num_steps: %d' % config.num_steps) print('hidden_size: %d' % config.hidden_size) print('max_epoch: %d' % config.max_epoch) print('max_max_epoch: %d' % config.max_max_epoch) print('keep_prob: %.2f' % config.keep_prob) print('lr_decay: %.2f' % config.lr_decay) print('batch_size: %d' % config.batch_size) print('vocab_size: %d' % config.vocab_size) sys.stdout.flush() eval_config = MediumConfig() eval_config.init_scale = config.init_scale eval_config.learning_rate = config.learning_rate eval_config.max_grad_norm = config.max_grad_norm eval_config.num_layers = config.num_layers eval_config.num_steps = config.num_steps eval_config.hidden_size = config.hidden_size eval_config.max_epoch = config.max_epoch eval_config.max_max_epoch = config.max_max_epoch eval_config.keep_prob = config.keep_prob eval_config.lr_decay = config.lr_decay eval_config.batch_size = 200 eval_config.vocab_size = len(vocab) prev = 0 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() if FLAGS.model_path: saver = tf.train.Saver() for i in range(config.max_max_epoch): shuffle(train_data) shuffled_data = list(itertools.chain(*train_data)) start_time = time.time() lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, shuffled_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data, tf.no_op(), vocab['<eos>']) print("Epoch: %d Valid F1: %.2f (%d trees)" % (i + 1, valid_f1, num)) print('It took %.2f seconds' % (time.time() - start_time)) if prev < valid_f1: prev = valid_f1 if FLAGS.model_path: print('Save a model to %s' % FLAGS.model_path) saver.save(session, FLAGS.model_path) pickle.dump(eval_config, open(FLAGS.model_path + '.config', 'wb')) sys.stdout.flush()
help='number of epochs') parser.add_argument('--dp_keep_prob', type=float, default=0.35, help='dropout *keep* probability') parser.add_argument('--inital_lr', type=float, default=20.0, help='initial learning rate') parser.add_argument('--save', type=str, default='lm_model.pt', help='path to save the final model') args = parser.parse_args() raw_data = reader.ptb_raw_data(data_path=args.data) train_data, valid_data, test_data, word_to_id, id_2_word = raw_data vocab_size = len(word_to_id) print('Vocabulary size: {}'.format(vocab_size)) model = LM_LSTM(embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.cuda() lr = args.inital_lr # decay factor for learning rate lr_decay_base = 1 / 1.15 # we will not touch lr for the first m_flat_lr epochs m_flat_lr = 14.0
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) tf.summary.scalar("Test Loss", mvalid.cost) sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session() as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def SGDBead(self, bead, thresh, maxindex): raw_data = reader.ptb_raw_data(data_path) train_data, _, test_data, _ = raw_data curWeights, curBiases = self.AllBeads[bead] test_model = PTBModel(config=config, w=curWeights, b=curBiases) with test_model.g.as_default(): with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None): inputs_for_training = tf.nn.embedding_lookup(test_model.weights['e'], train_input.input_data) if True and config.keep_prob < 1: #True is a standin for is_training inputs_for_training = tf.nn.dropout(inputs_for_training, config.keep_prob) inputs_for_training = [tf.squeeze(input_step, [1]) for input_step in tf.split(1, 20, inputs_for_training)] pred = test_model.predict(inputs_for_training) loss = tf.nn.seq2seq.sequence_loss_by_example([pred], [tf.reshape(train_input.targets, [-1])], [tf.ones([test_model.batch_size * test_model.num_steps], dtype=data_type())]) cost = tf.reduce_sum(loss) / test_model.batch_size test_model._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(test_model._lr) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) test_model._new_lr = tf.placeholder( tf.float32, shape=[], name="new_learning_rate") test_model._lr_update = tf.assign(test_model._lr, test_model._new_lr) test_LSTM_weight = tf.trainable_variables()[-4] with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True): inputs_for_testing = tf.nn.embedding_lookup(test_model.weights['e'], test_input.input_data) inputs_for_testing = [tf.squeeze(input_step, [1]) for input_step in tf.split(1, 20, inputs_for_testing)] pred_test = test_model.predict(inputs_for_testing) loss_test = tf.nn.seq2seq.sequence_loss_by_example([pred_test], [tf.reshape(test_input.targets, [-1])], [tf.ones([test_model.batch_size * test_model.num_steps], dtype=data_type())]) cost_test = tf.reduce_sum(loss_test) / test_model.batch_size init = tf.initialize_all_variables() sv = tf.train.Supervisor() with sv.managed_session() as session: session.run(init) stopcond = True perplex_thres=10000. i = 0 i_max = 100 while i < i_max and stopcond: lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) test_model.assign_lr(session, config.learning_rate * lr_decay) print "Epoch: %d Learning rate: %.3f" + str((i + 1, session.run(test_model.lr))) train_perplexity = run_epoch(session, test_model, train_input,cost, eval_op=train_op, verbose=False) print "Epoch: %d Train Perplexity: %.3f" + str((i + 1, train_perplexity)) test_perplexity = run_epoch(session, test_model, test_input, cost_test, verbose=False) print "Test perplexity: " + str(test_perplexity) i+=1 if test_perplexity < thresh: stopcond = False with tf.name_scope("Test"): with tf.variable_scope("Model", reuse=True): tv = tf.trainable_variables() test_model.params = [session.run(tv[-4]),\ session.run(tv[-2]),\ session.run(tv[-7]),\ session.run(tv[-6])],\ [session.run(tv[-3]),\ session.run(tv[-1]),\ session.run(tv[-5])] self.AllBeads[bead]=test_model.params return test_perplexity
cost = vals["cost"] state = vals["final_state"] costs += cost iters += model.input.num_steps if verbose and step % (model.input.epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / model.input.epoch_size, np.exp( costs / iters), iters * model.input.batch_size / (time.time() - start_time))) return np.exp(costs / iters) raw_data = reader.ptb_raw_data('simple-examples/data/') train_data, valid_data, test_data, _ = raw_data config = SmallConfig() eval_config = SmallConfig() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput")
def train(self): fakedata = np.zeros((2,4)) fakedata[0,:] = [1,1,0,0] fakedata[1,:] = [2,2,0,0] # for i in xrange(1000): # guess, z, z_mean, z_stddev, gen_loss, latent_loss, _ = self.sess.run([self.d, self.z, self.z_mean, self.z_stddev, self.generation_loss, self.latent_loss, self.optimizer], feed_dict={self.sentences_in: fakedata}) # print "%f %f" % (np.mean(gen_loss), np.mean(latent_loss)) # print np.argmax(guess,axis=2) # # print z_mean # # print z_stddev # print z # # print partway.shape # np.set_printoptions(threshold=np.inf) raw_data = reader.ptb_raw_data("/home/kevin/Documents/Datasets/simple-examples/data") train_data, valid_data, test_data, vocabsize = raw_data print vocabsize # print train_data list(reader.ptb_iterator(valid_data, self.batchsize, self.sentence_length)) saver = tf.train.Saver(max_to_keep=2) # saver.restore(self.sess, tf.train.latest_checkpoint(os.getcwd()+"/training/")) ls = 0.1 for epoch in xrange(10000): if epoch > 20: ls = min(1, epoch / 50.0) total_genloss = 0 total_latentloss = 0 steps = 0 for step, x in enumerate(reader.ptb_iterator(test_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, latent_loss, _ = self.sess.run([self.generation_loss, self.latent_loss, self.update], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2, self.latentscale: ls}) gl = np.mean(gen_loss) / self.sentence_length # print "gen loss: %f latent loss: %f perplexity: %f" % (gl, np.mean(latent_loss), np.exp(gl)) total_genloss += gl total_latentloss += np.mean(latent_loss) steps = steps + 1 print "epoch %d genloss %f perplexity %f latentloss %f" % (epoch, total_genloss / steps, np.exp(total_genloss/steps), total_latentloss) total_validloss = 0 validsteps = 0 for step, x in enumerate(reader.ptb_iterator(valid_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, latent_loss = self.sess.run([self.generation_loss, self.latent_loss], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2, self.latentscale: ls}) gl = np.mean(gen_loss) / self.sentence_length # print "gen loss: %f latent loss: %f perplexity: %f" % (gl, np.mean(latent_loss), np.exp(gl)) total_validloss += gl validsteps = validsteps + 1 print "valid %d genloss %f perplexity %f" % (epoch, total_validloss / validsteps, np.exp(total_validloss/validsteps)) if epoch % 10 == 0: saver.save(self.sess, os.getcwd()+"/training-reg/train",global_step=epoch)
state = sess.run(model.initial_state) for step, (x, y) in enumerate(ptb_iterator(data, model.batch_size, model.num_steps)): feed_dict = {model.input: x, model.target: y, model.initial_state: state} cost, state, _ = sess.run([model.cost, model.final_state, eval_op], feed_dict=feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs/iters) if __name__ == "__main__": # Load the PTB data data_path = sys.path[0] + "/data/" train_data, valid_data, test_data, vocab= ptb_raw_data(data_path=data_path) print(len(train_data), len(valid_data), len(test_data), vocab) # Configs config = LargeConfig() eval_config = LargeConfig() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as sess: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): model = LSTM_Model(num_steps=config.num_steps, vocab_size=config.vocab_size, batch_size= config.batch_size, hidden_size=config.hidden_size, num_lstm_layers=config.num_layers, keep_prob=config.keep_prob, max_grad_norm=config.max_grad_norm, is_training=True) with tf.variable_scope("model", reuse=True, initializer=initializer): val_model = LSTM_Model(num_steps=config.num_steps, vocab_size=config.vocab_size, batch_size=
def main(): args = parse_args() model_type = args.model_type logger = logging.getLogger("ptb") logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.log_path: file_handler = logging.FileHandler(args.log_path) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) else: console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) console_handler.setFormatter(formatter) logger.addHandler(console_handler) logger.info('Running with args : {}'.format(args)) if model_type == "small": batch_size = 20 num_steps = 20 hidden_size = 200 num_layers = 2 vocab_size = 10000 keep_prob = 1.0 init_scale = 0.1 max_grad_norm = 5 max_epoch = 13 base_learning_rate = 1.0 lr_decay = 0.5 epoch_start_decay = 4 elif model_type == "medium": batch_size = 20 num_steps = 35 hidden_size = 650 num_layers = 2 vocab_size = 10000 keep_prob = 0.5 init_scale = 0.05 max_grad_norm = 5 max_epoch = 39 base_learning_rate = 1.0 lr_decay = 0.8 epoch_start_decay = 6 elif model_type == "large": batch_size = 20 num_steps = 35 hidden_size = 1500 num_layers = 2 vocab_size = 10000 keep_prob = 0.35 init_scale = 0.04 max_grad_norm = 10 max_epoch = 55 base_learning_rate = 1.0 lr_decay = 1 / 1.15 epoch_start_decay = 14 else: print("type not support", model_type) exit() if args.max_epoch > 0: max_epoch = args.max_epoch if args.profile: print( "\nProfiler is enabled, only 1 epoch will be ran (set max_epoch = 1).\n" ) max_epoch = 1 # Create symbolic vars cost, final_h, final_c, train_op, new_lr, lr_update, feeding_list = ptb_lm_model( hidden_size, vocab_size, batch_size, num_layers, num_steps, init_scale, keep_prob, max_grad_norm, rnn_type=args.rnn_type) # Initialize session init = tf.global_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamic allocation of VRAM # config.gpu_options.allow_growth = False # dynamic allocation of VRAM # Print parameter count params = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value params += variable_parameters print('# network parameters: ' + str(params)) data_path = "data/simple-examples/data" raw_data = reader.ptb_raw_data(data_path) print("finished load data") train_data, valid_data, test_data, _ = raw_data def eval(sess, data): if args.inference_only: sess.run(init) batch_times = [] start_time = time.time() eval_loss = 0.0 eval_iters = 0 eval_data_iter = reader.get_data_iter(data, batch_size, num_steps) init_h = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') init_c = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') for batch in eval_data_iter: x, y = batch feed_dict = {} feed_dict[feeding_list[0]] = x feed_dict[feeding_list[1]] = y feed_dict[feeding_list[2]] = init_h feed_dict[feeding_list[3]] = init_c batch_start_time = time.time() output = sess.run([cost, final_h, final_c], feed_dict) batch_times.append(time.time() - batch_start_time) train_cost = output[0] init_h = output[1] init_c = output[2] eval_loss += train_cost eval_iters += num_steps ppl = np.exp(eval_loss / eval_iters) eval_time_total = time.time() - start_time eval_time_run = np.sum(batch_times) if args.inference_only: print( "Eval batch_size: %d; Time (total): %.5f s; Time (only run): %.5f s; ppl: %.5f" % (batch_size, eval_time_total, eval_time_run, ppl)) return ppl, eval_time_total def train(sess): sess.run(init) if args.profile: profiler_step = 0 profiler = model_analyzer.Profiler(graph=sess.graph) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() total_time = 0.0 epoch_times = [] for epoch_id in xrange(max_epoch): batch_times = [] epoch_start_time = time.time() train_data_iter = reader.get_data_iter(train_data, batch_size, num_steps) # assign lr, update the learning rate new_lr_1 = base_learning_rate * (lr_decay**max( epoch_id + 1 - epoch_start_decay, 0.0)) sess.run(lr_update, {new_lr: new_lr_1}) total_loss = 0.0 iters = 0 batch_len = len(train_data) // batch_size epoch_size = (batch_len - 1) // num_steps if args.profile: log_fre = 1 else: log_fre = epoch_size // 10 init_h = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') init_c = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') count = 0.0 for batch_id, batch in enumerate(train_data_iter): x, y = batch feed_dict = {} feed_dict[feeding_list[0]] = x feed_dict[feeding_list[1]] = y feed_dict[feeding_list[2]] = init_h feed_dict[feeding_list[3]] = init_c batch_start_time = time.time() if args.profile: output = sess.run([cost, final_h, final_c, train_op], feed_dict, options=run_options, run_metadata=run_metadata) profiler.add_step(step=profiler_step, run_meta=run_metadata) profiler_step = profiler_step + 1 if batch_id >= 10: break else: output = sess.run([cost, final_h, final_c, train_op], feed_dict) batch_time = time.time() - batch_start_time batch_times.append(batch_time) train_cost = output[0] init_h = output[1] init_c = output[2] total_loss += train_cost iters += num_steps count = count + 1 if batch_id > 0 and batch_id % log_fre == 0: ppl = np.exp(total_loss / iters) print( "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f" % (epoch_id, batch_id, batch_time, ppl, new_lr_1)) ppl = np.exp(total_loss / iters) epoch_time = time.time() - epoch_start_time epoch_times.append(epoch_time) total_time += epoch_time print( "\nTrain epoch:[%d]; epoch Time: %.5f s; ppl: %.5f; avg_time: %.5f steps/s\n" % (epoch_id, epoch_time, ppl, (batch_id + 1) / sum(batch_times))) valid_ppl, _ = eval(sess, valid_data) print("Valid ppl: %.5f" % valid_ppl) test_ppl, test_time = eval(sess, test_data) print("Test Time (total): %.5f, ppl: %.5f" % (test_time, test_ppl)) if args.profile: profile_op_opt_builder = option_builder.ProfileOptionBuilder() profile_op_opt_builder.select(['micros', 'occurrence']) profile_op_opt_builder.order_by('micros') profile_op_opt_builder.with_max_depth(50) profiler.profile_operations(profile_op_opt_builder.build()) with tf.Session(config=config) as sess: if not args.inference_only: train(sess) else: eval(sess, test_data)
#coding=UTF-8 import tensorflow as tf import reader ''' 1. 读取数据并打印长度及前100位数据。 ''' DATA_PATH = "../Dataset/PTB_data" train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) print(len(train_data)) print(train_data[:100]) ''' 2. 将训练数据组织成batch大小为4、截断长度为5的数据组。并使用队列读取前3个batch。 ''' # ptb_producer返回的为一个二维的tuple数据。 result = reader.ptb_producer(train_data, 4, 5) # 通过队列依次读取batch。 with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(3): x, y = sess.run(result) print("X%d: " % i, x) print("Y%d: " % i, y) coord.request_stop() coord.join(threads)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, word_to_id, words = raw_data # print(word_to_id) # print(words) config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() saver = tf.train.Saver(); if FLAGS.train: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) save_path = saver.save(session, "../models/rnn/rnn_melville_model.ckpt") print("Model saved in file: %s" % save_path) ckpt = tf.train.get_checkpoint_state('../models/rnn/'); if ckpt and ckpt.model_checkpoint_path: saver.restore(session,"../models/rnn/rnn_melville_model.ckpt"); # test_perplexity = run_epoch_eval(session, mtest, test_data, words, tf.no_op()) # print("Test Perplexity: %.3f" % test_perplexity) #------------- #inserted code begins here #------------- f = open(FLAGS.data_path+"ptb.test.txt"); testtext = f.read(); testpieces = testtext.split(); length = int(FLAGS.length) for i in range(100): start = random.randint(0,(len(testpieces)-length-1)); stop = start + length; inpieces = testpieces[start:stop]; instr = " ".join(inpieces); predicted_continuation = predict_words(session, mtest, instr, length, words, word_to_id, tf.no_op()); print("test sequence",i) print("input string:") print(instr); print(); print("predicted continuation:") print(predicted_continuation); print(); print("----------"); print(); print();
import reader import tensorflow as tf # 数据路径 DATA_PATH = 'simple-examples/data/' # 读取原始数据 train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) # 将数据组织成batch大小为4,截断长度为5的数据组,要放在开启多线程之前 batch = reader.ptb_producer(train_data, 4, 5) with tf.Session() as sess: tf.global_variables_initializer().run() # 开启多线程 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # 读取前两个batch,其中包括每个时刻的输入和对应的答案,ptb_producer()会自动迭代 for i in range(2): x, y = sess.run(batch) print('x:', x) print('y:', y) # 关闭多线程 coord.request_stop() coord.join(threads)
feed_dict[h] = state[i].h vals = session.run(fetches, feed_dict) cost = vals["cost"] state = vals["final_state"] costs += cost iters += model.input.num_steps if verbose and step % (model.input.epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / model.input.epoch_size, np.exp( costs / iters), iters * model.input.batch_size / (time.time() - start_time))) return np.exp(costs / iters) raw_data = reader.ptb_raw_data('data/penn/') train_data, valid_data, test_data, _ = raw_data # LargeConfig SmallConfig config = LargeConfig() eval_config = LargeConfig() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer):
"""__Dataset:__ For this homework, we will use the [Penn Tree Bank](https://catalog.ldc.upenn.edu/ldc99t42) (PTB) dataset, which is a popular benchmark for measuring the quality of these models, whilst being small and relatively fast to train. Below is a small sample of data. """ with tf.io.gfile.GFile(os.path.join(os.getcwd(), 'data', "ptb.train.txt"), "r") as f: data = f.read() data[0:2000] """__Pre Processing__ The given dataset is a collection of words as shown above. In order to train an neural network we need numerical representation of the data. For this purpose, we encode each word in the dataset with unique numerical values. The following code implements the conversion of dataset words into numerical values. """ data_path = os.path.join(os.getcwd(), 'data') raw_data = reader.ptb_raw_data(data_path) train_data, valid_data, test_data, vocabulary, word_ids = raw_data data = { 'train_data': train_data[0:10000], 'valid_data': valid_data, 'test_data': test_data, 'vocabulary': vocabulary, 'word_ids': word_ids } ids_words = {i: w for w, i in word_ids.items()} word_ids """### Demo for Recurrent Neural Network <img src="rnn_pic.png" alt="Drawing" style="width: 600px;"/>
def main(_): raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = RNNModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = RNNModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput( config=config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = RNNModel(is_training=False, config=config, input_=test_input) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError("num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) config_proto.gpu_options.allow_growth = True with sv.managed_session(config=config_proto) as session: best_valid_perplexity = 10000 valid_perplexity = 0 best_test_perplexity = 10000 test_perplexity = 0 for i in range(config.max_max_epoch): if valid_perplexity > best_valid_perplexity or test_perplexity > best_test_perplexity: # lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) if config.learning_rate > 0.0001: config.learning_rate = config.learning_rate * config.lr_decay else: config.learning_rate = config.learning_rate else: config.learning_rate = config.learning_rate m.assign_lr(session, config.learning_rate) print("Epoch: %d Learning rate: %.4f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) if valid_perplexity < best_valid_perplexity: best_valid_perplexity = valid_perplexity print("Epoch: %d Valid Perplexity: %.3f best valid: %.3f" % (i + 1, valid_perplexity, best_valid_perplexity)) test_perplexity = run_epoch(session, mtest) if test_perplexity < best_test_perplexity: best_test_perplexity = test_perplexity f = open('ppl_hidden_'+str(config.hidden_size)+'.txt', 'w') f.write('best_test_perplexity:'+str(best_test_perplexity)+'\n') f.write('best_valid_perplexity:'+str(best_valid_perplexity)+'\n') f.close() print("Epoch: %d Test Perplexity: %.3f best test: %.3f" % (i + 1, test_perplexity, best_test_perplexity)) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): # 原始数据 train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) # 计算一个epoch需要训练的次数 train_data_len = len(train_data) # 数据集的大小 train_batch_len = train_data_len // TRAIN_BATCH_SIZE # batch的个数 train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP # 该epoch的训练次数 valid_data_len = len(valid_data) valid_batch_len = valid_data_len // EVAL_BATCH_SIZE valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP test_data_len = len(test_data) test_batch_len = test_data_len // EVAL_BATCH_SIZE test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP # 生成数据队列,必须放在开启多线程之前 train_queue = reader.ptb_producer(train_data, train_model.batch_size, train_model.num_steps) valid_queue = reader.ptb_producer(valid_data, eval_model.batch_size, eval_model.num_steps) test_queue = reader.ptb_producer(test_data, eval_model.batch_size, eval_model.num_steps) # 定义初始化函数 initializer = tf.random_uniform_initializer(-0.05, 0.05) # 定义训练用的模型 with tf.variable_scope( 'language_model', reuse=None, initializer=initializer): train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP) # 定义评估用的模型 with tf.variable_scope( 'language_model', reuse=True, initializer=initializer): eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP) with tf.Session() as sess: tf.global_variables_initializer().run() # 开启多线程从而支持ptb_producer()使用tf.train.range_input_producer() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 使用训练数据训练模型 for i in range(NUM_EPOCH): print('In iteration: %d' % (i + 1)) run_epoch(sess, train_model, train_queue, train_model.train_op, True, train_epoch_size) # 训练模型 valid_perplexity = run_epoch(sess, eval_model, valid_queue, tf.no_op(), False, valid_epoch_size) # 使用验证数据评估模型 print('Epoch: %d Validation Perplexity: %.3f' % (i + 1, valid_perplexity)) # 使用测试数据测试模型 test_perplexity = run_epoch(sess, eval_model, test_queue, tf.no_op(), False, test_epoch_size) print('Test Perplexity: %.3f' % test_perplexity) # 停止所有线程 coord.request_stop() coord.join(threads)
costs += cost iters += model.input.num_steps # 每一定量运行后输出目前结果 if verbose and step % (model.input.epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / model.input.epoch_size, np.exp(costs / iters), iters * model.input.batch_size / (time.time() - start_time))) return np.exp(costs / iters) raw_data = reader.ptb_raw_data('simple-examples/data/') train_data, valid_data, test_data, _ = raw_data config = SmallConfig() eval_config = SmallConfig() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input)
return self._train_op flags = tf.flags logging = tf.logging flags.DEFINE_string( "model", "small", "A type of model. Possible options are: small, medium, large.") flags.DEFINE_string("data_path", None, "data_path") FLAGS = flags.FLAGS FLAGS.data_path='/Users/marting/scratch/tensorflow/simple-examples/data/' raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data class SmallConfig(object): """Small config.""" init_scale = 0.1 learning_rate = 1.0 max_grad_norm = 5 num_layers = 2 num_steps = 20 hidden_size = 200 max_epoch = 4 max_max_epoch = 13 keep_prob = 1.0 lr_decay = 0.5 batch_size = 20 vocab_size = 10000
with tf.Session() as sess: print('loading the embedding matrix from {}'.format(checkpoint_file)) saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) embedding_var = None for var in tf.all_variables(): if var.name == 'Model/embedding:0': embedding_var = var break if not embedding_var: print("Couldn't find the embedding matrix!") exit(1) embedding = sess.run([embedding_var])[0] print('loading the training data to get the vocabulary...') raw_data = reader.ptb_raw_data('data') _, _, _, word_to_id = raw_data # The unkwown word index unk = word_to_id['<unk>'] def similarity(word1, word2): e1 = embedding[word_to_id.get(word1, unk)] e2 = embedding[word_to_id.get(word2, unk)] sim = 1 - spatial.distance.cosine(e1, e2) print("similarity({}, {}) = {}".format(word1, word2, sim)) return sim score = 0 score += similarity('a', 'an') > similarity('a', 'document') score += similarity('in', 'of') > similarity('in', 'picture')
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError( "Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput( config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError("num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 if config.device == '-1': tf_dev = '/cpu:0' else: tf_dev = '/gpu:' + config.device print(tf_dev) tconfig = tf.ConfigProto(allow_soft_placement=True) if tf_dev.find('cpu') >= 0: # cpu version num_threads = os.getenv('OMP_NUM_THREADS', 1) tconfig = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=int(num_threads)) with tf.Graph().as_default(), tf.device(tf_dev), tf.Session( config=tconfig) as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): #mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.global_variables_initializer().run() total_average_batch_time = 0.0 epochs_info = [] for i in range(config.max_max_epoch): #lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) #m.assign_lr(session, config.learning_rate * lr_decay) m.assign_lr(session, config.learning_rate) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, average_batch_time = run_epoch(session, m, train_data, m.train_op, verbose=True) total_average_batch_time += average_batch_time print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) if i % 2 == 0: epochs_info.append('%d:_:%.3f' % (i, train_perplexity)) # valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) # print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) print("average_batch_time: %.6f" % (total_average_batch_time / int(config.max_max_epoch))) print('epoch_info:' + ','.join(epochs_info)) test_perplexity, test_average_batch_time = run_epoch( session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def main(customConfig = CustomConfig): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path, FLAGS.file_prefix) train_data, valid_data, test_data, word_to_id, id_2_word = raw_data vocab_size = len(word_to_id) #print(word_to_id) print_('Distinct terms: %d' % vocab_size) config = get_config() if customConfig == None else customConfig() config.vocab_size = config.vocab_size if config.vocab_size < vocab_size else vocab_size eval_config = get_config() if customConfig == None else customConfig() eval_config.vocab_size = eval_config.vocab_size if eval_config.vocab_size < vocab_size else vocab_size eval_config.batch_size = 1 eval_config.num_steps = 1 if config.is_char_model: seed_for_sample = [c for c in FLAGS.seed_for_sample.replace(' ', '_')] else: seed_for_sample = FLAGS.seed_for_sample.split() with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) tf.summary.scalar("Training_Loss", m.cost) tf.summary.scalar("Learning_Rate", m.lr) with tf.name_scope("Valid"): with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) tf.summary.scalar("Validation_Loss", mvalid.cost) with tf.name_scope("Test"): with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config) saver = tf.train.Saver(name='saver', write_version=tf.train.SaverDef.V2) sv = tf.train.Supervisor(logdir=FLAGS.save_path, save_model_secs=0, save_summaries_secs=0, saver=saver) old_valid_perplexity = 10000000000.0 #sessconfig = tf.ConfigProto(allow_soft_placement=True) #sessconfig.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 with sv.managed_session() as session: if FLAGS.sample_mode: while True: inpt = raw_input("Enter your sample prefix: ") cnt = int(raw_input("Sample size: ")) if config.is_char_model: seed_for_sample = [c for c in inpt.replace(' ', '_')] else: seed_for_sample = inpt.split() print_(nowStr()+':', "Seed: %s" % pretty_print([word_to_id[x] for x in seed_for_sample], config.is_char_model, id_2_word)) print_(nowStr()+':', "Sample: %s" % pretty_print(do_sample(session, mtest, [word_to_id[word] for word in seed_for_sample], cnt), config.is_char_model, id_2_word)) print_('epoch', config.max_max_epoch) for i in range(config.max_max_epoch): print_("Seed: %s" % pretty_print([word_to_id[x] for x in seed_for_sample], config.is_char_model, id_2_word)) print_("Sample: %s" % pretty_print(do_sample(session, mtest, [word_to_id[word] for word in seed_for_sample], max(5 * (len(seed_for_sample) + 1), 10)), config.is_char_model, id_2_word)) lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print_(nowStr()+':', "Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, is_train=True, verbose=True) print_(nowStr()+':', "Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data) print_(nowStr()+':', "Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) if valid_perplexity < old_valid_perplexity: old_valid_perplexity = valid_perplexity sv.saver.save(session, FLAGS.save_path, i) elif valid_perplexity >= 1.3*old_valid_perplexity: if len(sv.saver.last_checkpoints)>0: sv.saver.restore(session, sv.saver.last_checkpoints[-1]) break else: if len(sv.saver.last_checkpoints)>0: sv.saver.restore(session, sv.saver.last_checkpoints[-1]) lr_decay *=0.5 print_(nowStr()+':', "Seed: %s" % pretty_print([word_to_id[x] for x in seed_for_sample], config.is_char_model, id_2_word)) print_(nowStr()+':', "Sample: %s" % pretty_print(do_sample(session, mtest, [word_to_id[word] for word in seed_for_sample], max(5 * (len(seed_for_sample) + 1), 10)), config.is_char_model, id_2_word)) test_perplexity = run_epoch(session, mtest, test_data) print_(nowStr()+':', "Test Perplexity: %.3f" % test_perplexity)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path, True) train_data, valid_data, _ = raw_data with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-FLAGS.init_scale, FLAGS.init_scale) with tf.name_scope("Train"): train_input = PTBInput(data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Train_states"): train_input = PTBInput(data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mstates = PTBModel(is_training=False, input_=train_input) tf.summary.scalar("Training Loss", mstates.cost) with tf.name_scope("Valid"): valid_input = PTBInput(data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session() as session: if FLAGS.load_path: sv.saver.restore(session, tf.train.latest_checkpoint(FLAGS.load_path)) else: for i in range(FLAGS.max_max_epoch): lr_decay = FLAGS.lr_decay**max(i + 1 - FLAGS.max_epoch, 0.0) m.assign_lr(session, FLAGS.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, stat = run_epoch(session, m, eval_op=m.train_op, verbose=True) print(stat.shape) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity, stat = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) # run and store the states on training set train_perplexity, stat = run_epoch(session, mstates, eval_op=m.train_op, verbose=True) f = h5py.File("states.h5", "w") stat = np.reshape(stat, (-1, mstates.size)) f["states1"] = stat f.close() if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): tf.set_random_seed(FLAGS.gseed) if FLAGS.init_scale != 0.0: initializer = tf.random_uniform_initializer(-1*FLAGS.init_scale, FLAGS.init_scale) else: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, debug=FLAGS.debug) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) #with tf.name_scope("Test"): # with tf.variable_scope("Model", reuse=True, initializer=initializer): # mtest = PTBModel(is_training=False, config=eval_config) per_epoch_train_loss_update = tf.placeholder(tf.float32, shape=[]) per_epoch_train_loss = tf.Variable(float("inf"), dtype=tf.float32, trainable=False, name='Epoch_train_loss', validate_shape=False) tf.summary.scalar("Training Perplexity", per_epoch_train_loss) per_epoch_train_loss_update_op = tf.assign(per_epoch_train_loss, per_epoch_train_loss_update) per_epoch_valid_loss_update = tf.placeholder(tf.float32, shape=[]) per_epoch_valid_loss = tf.Variable(float("inf"), dtype=tf.float32, trainable=False, name='Epoch_train_loss', validate_shape=False) tf.summary.scalar("Validation Perplexity", per_epoch_valid_loss) per_epoch_valid_loss_update_op = tf.assign(per_epoch_valid_loss, per_epoch_valid_loss_update) # summary = tf.summary.merge_all() prev_validation_error = float("inf") validation_err_went_up_counter = 0 saver = tf.train.Saver() #summary_writer = tf.train.SummaryWriter(logdir=FLAGS.save_path, graph=tf.get_default_graph()) sv = tf.train.Supervisor(logdir=FLAGS.save_path, is_chief=True, save_model_secs=0, saver=saver, save_summaries_secs=0) # if FLAGS.initial_lr != 0.0: # we'll do 0 epoch erange = [-1] + range(config.max_max_epoch) else: erange = range(config.max_max_epoch) path_to_latest_checkpoint = "" with sv.managed_session() as session: for i in erange: if i != -1: lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) else: #very first epoch m.assign_lr(session, FLAGS.initial_lr) print("Epoch: %d Learning rate: %.8f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, eval_op=m.train_op, verbose=True, epoch_ind=i) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) if valid_perplexity < prev_validation_error: prev_validation_error = valid_perplexity validation_err_went_up_counter = 0 path_to_latest_checkpoint = sv.saver.save(sess=session, save_path=FLAGS.save_path+"/model", global_step=i) print("Saved currently best model to: %s" % path_to_latest_checkpoint) else: validation_err_went_up_counter += 1 if validation_err_went_up_counter > FLAGS.max_valid_increases: print("EARLY STOPPING!!! Restoring from %s" % (path_to_latest_checkpoint)) sv.saver.restore(session, path_to_latest_checkpoint) session.run(per_epoch_valid_loss_update_op, feed_dict={per_epoch_valid_loss_update: valid_perplexity}) session.run(per_epoch_train_loss_update_op, feed_dict={per_epoch_train_loss_update: train_perplexity})
def main(_): t1 = time.time() if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, word_to_id = raw_data reverse_dic = dict(zip(word_to_id.values(), word_to_id.keys())) whole = [['a','in','nation','films','workers','institutions','assets',"'",'finance','good'], ['an','of','country','movies','employees','organizations','portfolio',",",'acquisition','great'], ['document','picture','end','almost','movies','big','down','quite','seems','minutes']] dic = [] dic2 = {} for words in whole: dic.append([]) for word in words: dic2[word] = word_to_id.get(word, word_to_id["<oov>"]) dic[-1].append(dic2[word]) config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input, dic_ = dic) tf.scalar_summary("Training Loss", m.cost) tf.scalar_summary("Learning Rate", m.lr) tf.scalar_summary("Model Score", m.score) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input, dic_= dic) tf.scalar_summary("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input, dic_=dic) merged = tf.merge_all_summaries() sv = tf.train.Supervisor(logdir=FLAGS.save_path) with sv.managed_session() as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) print("Epoch: %d Model Score: %.3f" % (i + 1, session.run(m.score))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) t2 = time.time() print("time costs: %.3f" % (t2-t1)) final_embeds = session.run(m.embeds) tsne = TSNE(perplexity=30, n_components = 2, init='pca', n_iter = 5000) plot_only = 100 embeds = tsne.fit_transform(final_embeds[:plot_only,:]) labels = [reverse_dic[i] for i in xrange(plot_only)] plot(embeds,labels) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError("Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() temp_meta = MessageToJson(metagraph.graph_def) with open('kernelLogs/metagraph.json', 'w') as outfile: json.dump(temp_meta, outfile) #sys.exit() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError( "num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") # soft_placement = True soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) #added by ubaid all_ops = tf.get_default_graph().get_operations() adj_list_graph = {} for op in all_ops: adj_list_graph[op.name] = set([inp.name for inp in op.inputs]) adj_list_graph_notensors = {} for op in all_ops: adj_list_graph_notensors[op.name] = set( [inp.name.split(":")[0] for inp in op.inputs]) adj_list_graph_notensors = { op_name: list(op_deps) for op_name, op_deps in adj_list_graph_notensors.items() } adj_list_graph = { op_name: list(op_deps) for op_name, op_deps in adj_list_graph.items() } with open('kernelLogs/org_graph_rnnlm_ptb_%s.json' % (FLAGS.model), 'w') as outfile: json.dump(adj_list_graph, outfile) with open( 'kernelLogs/org_graph_notensors_rnnlm_ptb_%s.json' % (FLAGS.model), 'w') as outfile: json.dump(adj_list_graph_notensors, outfile) #sys.exit() ##### with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) #config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) # added by xilenteyex config_proto = tf.ConfigProto( allow_soft_placement=soft_placement, graph_options=tf.GraphOptions(build_cost_model=1)) config_proto.intra_op_parallelism_threads = 1 config_proto.inter_op_parallelism_threads = 1 config_proto.graph_options.optimizer_options.opt_level = -1 config_proto.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) config_proto.graph_options.rewrite_options.arithmetic_optimization = ( rewriter_config_pb2.RewriterConfig.OFF) config_proto.graph_options.rewrite_options.dependency_optimization = ( rewriter_config_pb2.RewriterConfig.OFF) config_proto.graph_options.rewrite_options.layout_optimizer = ( rewriter_config_pb2.RewriterConfig.OFF) ###### with sv.managed_session(config=config_proto) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True, epoch_no=i) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
# # # Training data # # # ######################################################################## # Train data is a list of words, of size 929589, represented by numbers, # e.g. [9971, 9972, 9974, 9975,...] # We read data as mini-batches of size b=30. Assume the size of each # sentence is 20 words (num_steps = 20). Then it will take 𝑓𝑙𝑜𝑜𝑟(𝑁/(𝑏×ℎ)+1=1548 # iterations for the learner to go through all sentences once. Where N is the # size of the list of words, b is batch size, and h is size of each sentence. # So, the number of iterators is 1548. # Each batch data is read from train dataset of size 600, and shape of [30x20]. # Reads and separate data into training data, validation data and testing sets. raw_data = reader.ptb_raw_data(data_dir) train_data, valid_data, test_data, vocab, word_to_id = raw_data def id_to_word(id_list): """Convert id to word.""" line = [] for w in id_list: for word, wid in word_to_id.items(): if wid == w: line.append(word) return line print(id_to_word(train_data[0:100]))
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError( "Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput( config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError("num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f%%" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f%%" % valid_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") config = configs.get_config(FLAGS.model) eval_config = configs.get_config(FLAGS.model) valid_config = configs.get_config(FLAGS.model) print(config.batch_size) eval_config.batch_size = 1 valid_config.batch_size = 20 raw_data = reader.ptb_raw_data(FLAGS.data_path + config.dataset + '/') train_data, valid_data, test_data, _ = raw_data if not os.path.exists(os.path.dirname(FLAGS.save_path)): try: os.makedirs(os.path.dirname(FLAGS.save_path)) except OSError as exc: if exc.errno != errno.EEXIST: raise with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) saver = tf.train.Saver(tf.trainable_variables()) with tf.Session() as session: session.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coord) if FLAGS.restore == "True": saver.restore(session, FLAGS.save_path + 'model.ckpt') if FLAGS.mode == "train": previous_val = 9999 if FLAGS.restore == "True": f = open(FLAGS.save_path + 'train-and-valid.txt', 'r') x = f.readlines()[2] x = x.rstrip() x = x.split(" ") previous_val = float(x[1]) print("previous validation is %f\n" % (previous_val)) f.close() for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train BPC: %.4f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid BPC: %.4f" % (i + 1, valid_perplexity)) sys.stdout.flush() if i == 180: config.learning_rate *= 0.1 if valid_perplexity < previous_val: print("Storing weights") saver.save(session, FLAGS.save_path + 'model.ckpt') f = open(FLAGS.save_path + 'train-and-valid.txt', 'w') f.write("Epoch %d\nTrain %f\nValid %f\n" % (i, train_perplexity, valid_perplexity)) f.close() previous_val = valid_perplexity counter_val = 0 elif config.dataset == 'enwik8': counter_val += 1 if counter_val == 2: config.learning_rate *= 0.1 counter_val = 0 print("Loading best weights") saver.restore(session, FLAGS.save_path + 'model.ckpt') test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.4f" % test_perplexity) f = open(FLAGS.save_path + 'test_2.txt', 'w') f.write("Test %f\n" % (test_perplexity)) f.close() sys.stdout.flush() coord.request_stop() coord.join(threads)