def __init__(self, model_path, vocab_path, learning_rate=0.01, batch_size=1): self.word_to_idx = load_word_dict(vocab_path) self.idx_to_word = {v: k for k, v in self.word_to_idx.items()} self.learning_rate = learning_rate self.batch_size = batch_size t1 = time.time() tf.reset_default_graph() self.input_data = tf.placeholder(tf.int32, [batch_size, None]) self.output_targets = tf.placeholder(tf.int32, [batch_size, None]) # init model self.model = rnn_model(model='lstm', input_data=self.input_data, output_data=self.output_targets, vocab_size=len(self.word_to_idx), rnn_size=128, num_layers=2, batch_size=batch_size, learning_rate=learning_rate) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess = tf.Session() # init op self.sess.run(init_op) checkpoint = tf.train.latest_checkpoint(model_path) saver.restore(self.sess, checkpoint)
def ppl(sentence_list): ppl_list = [] # load data dict word_to_int = load_word_dict(conf.word_dict_path) # init params batch_size = 1 tf.reset_default_graph() input_data = tf.placeholder(tf.int32, [batch_size, None]) output_targets = tf.placeholder(tf.int32, [batch_size, None]) # init model end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(word_to_int), rnn_size=128, num_layers=2, batch_size=batch_size, learning_rate=conf.learning_rate) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: # init op sess.run(init_op) checkpoint = tf.train.latest_checkpoint(conf.model_dir) saver.restore(sess, checkpoint) print("loading model from the checkpoint {0}".format(checkpoint)) # infer each sentence for sentence in sentence_list: ppl = 0 # data idx x = [ word_to_int[c] if c in word_to_int else word_to_int[UNK_TOKEN] for c in sentence ] x = [word_to_int[START_TOKEN]] + x + [word_to_int[END_TOKEN]] print('x:', x) # reshape y = np.array(x[1:]).reshape((-1, batch_size)) x = np.array(x[:-1]).reshape((-1, batch_size)) print(x.shape) print(y.shape) # get each word perplexity word_count = x.shape[0] for i in range(word_count): perplexity = sess.run(end_points['perplexity'], feed_dict={ input_data: x[i:i + 1, :], output_targets: y[i:i + 1, :] }) print('{0} -> {1}, perplexity: {2}'.format( x[i:i + 1, :], y[i:i + 1, :], perplexity)) if i == 0 or i == word_count: continue ppl += perplexity ppl /= (word_count - 2) print('perplexity:' + str(ppl)) ppl_list.append(ppl) return ppl_list
def ppl(sentence_list): ppl_list = [] # load data dict word_to_int = load_word_dict(conf.word_dict_path) # init params batch_size = 1 tf.reset_default_graph() input_data = tf.placeholder(tf.int32, [batch_size, None]) output_targets = tf.placeholder(tf.int32, [batch_size, None]) # init model end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(word_to_int), rnn_size=128, num_layers=2, batch_size=batch_size, learning_rate=conf.learning_rate) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: # init op sess.run(init_op) checkpoint = tf.train.latest_checkpoint(conf.model_dir) saver.restore(sess, checkpoint) print("loading model from the checkpoint {0}".format(checkpoint)) # infer each sentence for sentence in sentence_list: ppl = 0 # data idx x = [word_to_int[c] if c in word_to_int else word_to_int[UNK_TOKEN] for c in sentence] x = [word_to_int[START_TOKEN]] + x + [word_to_int[END_TOKEN]] print('x:', x) # reshape y = np.array(x[1:]).reshape((-1, batch_size)) x = np.array(x[:-1]).reshape((-1, batch_size)) print(x.shape) print(y.shape) # get each word perplexity word_count = x.shape[0] for i in range(word_count): perplexity = sess.run(end_points['perplexity'], feed_dict={input_data: x[i:i + 1, :], output_targets: y[i:i + 1, :]}) print('{0} -> {1}, perplexity: {2}'.format(x[i:i + 1, :], y[i:i + 1, :], perplexity)) if i == 0 or i == word_count: continue ppl += perplexity ppl /= (word_count - 2) print('perplexity:' + str(ppl)) ppl_list.append(ppl) return ppl_list
def generate(begin_word): batch_size = 1 word_to_idx = load_word_dict(config.word_dict_path) vocabularies = [k for k, v in word_to_idx.items()] tf.reset_default_graph() input_data = tf.placeholder(tf.int32, [batch_size, None]) end_points = rnn_model(model='lstm', input_data=input_data, output_data=None, vocab_size=len(word_to_idx), rnn_size=128, num_layers=2, batch_size=64, learning_rate=0.0002) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) checkpoint = tf.train.latest_checkpoint(config.model_dir) saver.restore(sess, checkpoint) print("loading model from the checkpoint {0}".format(checkpoint)) x = np.array([list(map(word_to_idx.get, START_TOKEN))]) [predict, last_state ] = sess.run([end_points['prediction'], end_points['last_state']], feed_dict={input_data: x}) if begin_word: word = begin_word else: word = to_word(predict, vocabularies) sentence = '' i = 0 while word != END_TOKEN and word != START_TOKEN and word != UNK_TOKEN: sentence += word i += 1 if i >= 24: break x = np.zeros((1, 1)) try: x[0, 0] = word_to_idx[word] except KeyError: print("please enter a chinese char again.") break [predict, last_state ] = sess.run([end_points['prediction'], end_points['last_state']], feed_dict={ input_data: x, end_points['initial_state']: last_state }) word = to_word(predict, vocabularies) return sentence
def main(_): # build vocab and word dict data_vector, word_to_int = process_data(config.train_word_path, config.word_dict_path, config.cutoff_frequency) # batch data batches_inputs, batches_outputs = generate_batch(config.batch_size, data_vector, word_to_int) # placeholder input_data = tf.placeholder(tf.int32, [config.batch_size, None]) output_targets = tf.placeholder(tf.int32, [config.batch_size, None]) # create model end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(word_to_int), rnn_size=128, num_layers=2, batch_size=config.batch_size, learning_rate=config.learning_rate) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # start with tf.Session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)) as sess: # init sess.run(init_op) start_epoch = 0 checkpoint = tf.train.latest_checkpoint(config.model_dir) if checkpoint: saver.restore(sess, checkpoint) print("restore from the checkpoint {0}".format(checkpoint)) start_epoch += int(checkpoint.split('-')[-1]) print('start training...') try: for epoch in range(start_epoch, config.epochs): n = 0 n_chunk = len(data_vector) // config.batch_size for batch in range(n_chunk): loss, _, _, perplexity = sess.run([ end_points['total_loss'], end_points['last_state'], end_points['train_op'], end_points['perplexity'] ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]}) n += 1 print('Epoch: %d, batch: %d, training loss: %.6f, ppl: %.1f' % (epoch, batch, loss, perplexity)) if epoch % config.num_save_epochs == 0: saver.save(sess, os.path.join(config.model_dir, config.model_prefix), global_step=epoch) print('Save model to %s, epoch:%d' % (config.model_dir + config.model_prefix, epoch)) except KeyboardInterrupt: print('Interrupt manually, try saving checkpoint for now...') saver.save(sess, os.path.join(config.model_dir, config.model_prefix), global_step=epoch) print('Last epoch were saved, next time will start from epoch {}.'.format(epoch))
def main(_): # build vocab and word dict data_vector, word_to_int = process_data(conf.train_word_path, conf.word_dict_path) # batch data batches_inputs, batches_outputs = generate_batch(conf.batch_size, data_vector, word_to_int) # placeholder input_data = tf.placeholder(tf.int32, [conf.batch_size, None]) output_targets = tf.placeholder(tf.int32, [conf.batch_size, None]) # create model end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(word_to_int), rnn_size=128, num_layers=2, batch_size=conf.batch_size, learning_rate=conf.learning_rate) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # start with tf.Session() as sess: # init sess.run(init_op) start_epoch = 0 checkpoint = tf.train.latest_checkpoint(conf.model_dir) if checkpoint: saver.restore(sess, checkpoint) print("restore from the checkpoint {0}".format(checkpoint)) start_epoch += int(checkpoint.split('-')[-1]) print('start training...') try: for epoch in range(start_epoch, conf.epochs): n = 0 n_chunk = len(data_vector) // conf.batch_size for batch in range(n_chunk): loss, _, _, perplexity = sess.run([ end_points['total_loss'], end_points['last_state'], end_points['train_op'], end_points['perplexity'] ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]}) n += 1 print('Epoch: %d, batch: %d, training loss: %.6f, ppl: %.1f' % (epoch, batch, loss, perplexity)) if epoch % conf.num_save_epochs == 0: saver.save(sess, os.path.join(conf.model_dir, conf.model_prefix), global_step=epoch) except KeyboardInterrupt: print('Interrupt manually, try saving checkpoint for now...') saver.save(sess, os.path.join(conf.model_dir, conf.model_prefix), global_step=epoch) print('Last epoch were saved, next time will start from epoch {}.'.format(epoch))
def generate(begin_word): batch_size = 1 word_to_int = load_word_dict(conf.word_dict_path) vocabularies = [k for k, v in word_to_int.items()] tf.reset_default_graph() input_data = tf.placeholder(tf.int32, [batch_size, None]) end_points = rnn_model(model='lstm', input_data=input_data, output_data=None, vocab_size=len(word_to_int), rnn_size=128, num_layers=2, batch_size=64, learning_rate=0.0002) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) checkpoint = tf.train.latest_checkpoint(conf.model_dir) saver.restore(sess, checkpoint) print("loading model from the checkpoint {0}".format(checkpoint)) x = np.array([list(map(word_to_int.get, START_TOKEN))]) [predict, last_state] = sess.run([end_points['prediction'], end_points['last_state']], feed_dict={input_data: x}) if begin_word: word = begin_word else: word = to_word(predict, vocabularies) sentence = '' i = 0 while word != END_TOKEN and word != START_TOKEN and word != UNK_TOKEN: sentence += word i += 1 if i >= 24: break x = np.zeros((1, 1)) try: x[0, 0] = word_to_int[word] except KeyError: print("please enter a chinese char again.") break [predict, last_state] = sess.run([end_points['prediction'], end_points['last_state']], feed_dict={input_data: x, end_points['initial_state']: last_state}) word = to_word(predict, vocabularies) return sentence