def makeProcessModel(args, p_thres): with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f: chars, vocab = cPickle.load(f) model = Model(saved_args, training=False) with tf.Session() as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) model.makeProcessModel(sess, chars, vocab, p_thres=p_thres)
def sequence_sample(args): with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f: chars, vocab = cPickle.load(f) if args.prime == '': args.prime = chars[0] model = Model(saved_args, training=False) with tf.Session() as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) model.sequence_sample(sess, chars, vocab)
def train(): c = TrainConfig().define().print() vocab = load_vocab(c.vocab_path) c.vocab_size = len(vocab) model = Model(c) weights = tf.reshape(tf.sequence_mask(model.seq_length, maxlen=c.time_steps, dtype=tf.float64), shape=[c.batch_size * c.time_steps]) fat_loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( logits=[model.logits], targets=[tf.reshape(model.targets_ph, [-1])], weights=[weights]) loss = tf.reduce_sum(fat_loss) / c.batch_size tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), c.grad_clip) lr = tf.Variable(c.lr, trainable=False) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) ds = Dataset(c.ds_path, c.batch_size) saver = tf.train.Saver(max_to_keep=1) with tf.Session() as sess: tf.global_variables_initializer().run() save_path = restore_model(sess, saver, c) # summary stuff tf.summary.histogram('logits', model.logits) tf.summary.histogram('loss', fat_loss) tf.summary.scalar('train_loss', loss) summary_op = tf.summary.merge_all() summary_dir = os.path.join(c.log_dir, get_model_name(c), time.strftime('%Y.%m.%d:%H.%M.%S')) summary_writer = tf.summary.FileWriter(logdir=summary_dir, graph=sess.graph) # train for _ in range(c.epochs): x, y = ds.get_batch(sess) loss_, step, summary, _ = sess.run( (loss, global_step, summary_op, train_op), { model.inputs_ph: x, model.targets_ph: y, }) if step % c.log_step == 0: print(f"🔊 {step:-6d} - loss={loss_:.5f}") summary_writer.add_summary(summary, step) if step > 0 and step % c.save_step == 0: saved_path = saver.save(sess, save_path, global_step=step) print(f"💾 model saved to {saved_path}")
def sample_main(args): model_path, config_path, vocab_path = get_paths(args.save_dir) # Arguments passed to sample.py direct us to a saved model. # Load the separate arguments by which that model was previously trained. # That's saved_args. Use those to load the model. with open(config_path, 'rb') as f: saved_args = pickle.load(f) # Separately load chars and vocab from the save directory. with open(vocab_path, 'rb') as f: chars, vocab = pickle.load(f) # Create the model from the saved arguments, in inference mode. print("Creating model...") net = Model(saved_args, True) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(net.save_variables_list()) # Restore the saved variables, replacing the initialized values. print("Restoring weights...") saver.restore(sess, model_path) chatbot(net, sess, chars, vocab, args.n, args.beam_width, args.relevance, args.temperature)
def __init__(self, c: NetConfig): self.config = c if not c.vocab_path: c.vocab_path = os.path.join(os.path.dirname(c.model_path), 'vocab.json') with open(c.vocab_path) as f: self.char_to_id = json.load(f) # type: dict self.id_to_char = {i: c for c, i in self.char_to_id.items()} c.vocab_size = len(self.id_to_char) self.model = Model(c, training=False) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.load_graph(c.model_path)
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size if args.init_from is not None: assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "chars_vocab.pkl") ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.latest_checkpoint(args.init_from) assert ckpt, "No checkpoint found" with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars == data_loader.chars, "Data and loaded model disagree on characer set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: summaries = tf.summary.merge_all() writer = tf.summary.FileWriter( os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S"))) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) if args.init_from is not None: saver.restore(sess, ckpt) for e in range(args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h summ, train_loss, state, _ = sess.run( [summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summ, e * data_loader.num_batches + b) end = time.time() print( "{}/{} (epoch {}), train_loss = {:.3f}, time/bach = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e == args.num_epochs-1 and b == data_loader.num_batches-1): checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def train(args): # Create the data_loader object, which loads up all of our batches, vocab dictionary, etc. # from utils.py (and creates them if they don't already exist). # These files go in the data directory. data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size load_model = False if not os.path.exists(args.save_dir): print("Creating directory %s" % args.save_dir) os.mkdir(args.save_dir) elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))): # Trained model already exists ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) args.rnn_size = saved_args.rnn_size args.num_layers = saved_args.num_layers args.model = saved_args.model print( "Found a previous checkpoint. Overwriting model description arguments to:" ) print(" model: {}, rnn_size: {}, num_layers: {}".format( saved_args.model, saved_args.rnn_size, saved_args.num_layers)) load_model = True # Save all arguments to config.pkl in the save directory -- NOT the data directory. with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) # Save a tuple of the characters list and the vocab dictionary to chars_vocab.pkl in # the save directory -- NOT the data directory. with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) # Create the model! print("Building the model") model = Model(args) config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(model.save_variables_list()) if (load_model): print("Loading saved parameters") saver.restore(sess, ckpt.model_checkpoint_path) global_epoch_fraction = sess.run(model.global_epoch_fraction) global_seconds_elapsed = sess.run(model.global_seconds_elapsed) if load_model: print("Resuming from global epoch fraction {:.3f}," " total trained time: {}, learning rate: {}".format( global_epoch_fraction, global_seconds_elapsed, sess.run(model.lr))) data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction) initial_batch_step = int( (global_epoch_fraction - int(global_epoch_fraction)) * data_loader.total_batch_count) epoch_range = (int(global_epoch_fraction), args.num_epochs + int(global_epoch_fraction)) writer = tf.summary.FileWriter(args.save_dir, graph=tf.get_default_graph()) outputs = [ model.cost, model.final_state, model.train_op, model.summary_op ] is_lstm = args.model == 'lstm' global_step = epoch_range[ 0] * data_loader.total_batch_count + initial_batch_step try: for e in range(*epoch_range): # e iterates through the training epochs. # Reset the model state, so it does not carry over from the end of the previous epoch. state = sess.run(model.initial_state) batch_range = (initial_batch_step, data_loader.total_batch_count) initial_batch_step = 0 for b in range(*batch_range): global_step += 1 if global_step % args.decay_steps == 0: # Set the model.lr element of the model to track # the appropriately decayed learning rate. current_learning_rate = sess.run(model.lr) current_learning_rate *= args.decay_rate sess.run(tf.assign(model.lr, current_learning_rate)) print("Decayed learning rate to {}".format( current_learning_rate)) start = time.time() # Pull the next batch inputs (x) and targets (y) from the data loader. x, y = data_loader.next_batch() # feed is a dictionary of variable references and respective values for initialization. # Initialize the model's input data and target data from the batch, # and initialize the model state to the final state from the previous batch, so that # model state is accumulated and carried over between batches. feed = {model.input_data: x, model.targets: y} if is_lstm: for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h else: for i, c in enumerate(model.initial_state): feed[c] = state[i] # Run the session! Specifically, tell TensorFlow to compute the graph to calculate # the values of cost, final state, and the training op. # Cost is used to monitor progress. # Final state is used to carry over the state into the next batch. # Training op is not used, but we want it to be calculated, since that calculation # is what updates parameter states (i.e. that is where the training happens). train_loss, state, _, summary = sess.run(outputs, feed) elapsed = time.time() - start global_seconds_elapsed += elapsed writer.add_summary(summary, e * batch_range[1] + b + 1) print("{}/{} (epoch {}/{}), loss = {:.3f}, time/batch = {:.3f}s"\ .format(b, batch_range[1], e, epoch_range[1], train_loss, elapsed)) # Every save_every batches, save the model to disk. # By default, only the five most recent checkpoint files are kept. if (e * batch_range[1] + b + 1) % args.save_every == 0 \ or (e == epoch_range[1] - 1 and b == batch_range[1] - 1): save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed) except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: writer.flush() global_step = e * data_loader.total_batch_count + b save_model(sess, saver, model, args.save_dir, global_step, data_loader.total_batch_count, global_seconds_elapsed)