def decode(): with tf.Session(config=get_session_configs()) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path_context']) # Load vocabulary vectors vocab_vectors = load_pickle_file(paths['vocab_vectors_context']) # Load FastText model used for preprocessing print("Load existing FastText model...") fast_text_model = fasttext.load_model(paths['fast_text_model_context'], encoding='utf-8') # Decode from standard input. sys.stdout.write("Human: ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors) context = "" while sentence: context_sentence = context + sentence output = decode_sentence(context_sentence, vocab, rev_vocab, model, sess) print("Ola: " + " ".join(output)) print("Human: ", end="") context = sentence # or context = output sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def decode(): # Avoid allocating all of the GPU memory config = get_session_configs() with tf.Session(config=config) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path']) # Load vocabulary vectors vocab_vectors = load_pickle_file(paths['vocab_vectors']) # Load FastText model used for preprocessing print("Load existing FastText model...") fast_text_model = fasttext.load_model(paths['fast_text_model'], encoding='utf-8') # Decode from standard input. print("To reset states, type '*reset*'") sys.stdout.write("Human: ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors) # Initial state if FLAGS.use_lstm: initial_state = np.zeros((num_layers, 2, model.batch_size, size)) else: initial_state = np.zeros((num_layers, model.batch_size, size)) states = initial_state while sentence: output, states = decode_stateful_sentence(sentence, vocab, rev_vocab, model, sess, states) output = " ".join(output) output = get_sliced_output(output, 1) print("Vinyals_Stateful: " + " ".join(output)) print("Human: ", end="") sys.stdout.flush() sentence = sys.stdin.readline() if sentence.strip() == "*reset*": states = initial_state print("States were successfully reset.") print("Human: ", end="") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def decode(): with tf.Session(config=get_session_configs()) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. vocab, rev_vocab = read_vocabulary_from_file(paths['vocab_path']) # Load vocabulary vectors vocab_vectors = load_pickle_file(paths['vocab_vectors']) # Load FastText model used for preprocessing print("Load existing FastText model...") fast_text_model = fasttext.load_model(paths['fast_text_model'], encoding='utf-8') if FLAGS.open_subtitles: num_output_sentences = 1 else: num_output_sentences = 2 # Decode from standard input. sys.stdout.write("Human: ") sys.stdout.flush() sentence = sys.stdin.readline() sentence = preprocess_input(sentence, fast_text_model, vocab_vectors) while sentence: output = decode_sentence(sentence, vocab, rev_vocab, model, sess) output = " ".join(output) output = get_sliced_output(output, num_output_sentences) print("Grid LSTM: " + output.strip()) print("Human: ", end="") sys.stdout.flush() sentence = sys.stdin.readline() if FLAGS.context_full_turns: sentence = preprocess_input(output.strip() + " " + sentence.strip(), fast_text_model, vocab_vectors) else: sentence = preprocess_input(sentence, fast_text_model, vocab_vectors)
def train(): """Train a en->fr translation model using WMT data.""" print("Checking for needed files") check_for_needed_files_and_create() train_path = paths['train_path'] shuffle_file(train_path, train_path) print("Creating file queue") filename_queue = input_pipeline(root=paths['preprocess_root_files'] ,start_name=paths['train_file']) filename_queue_dev = input_pipeline(root=paths['preprocess_root_files'], start_name=paths['dev_file']) perplexity_log_path = os.path.join(FLAGS.train_dir, paths['perplexity_log']) if not os.path.exists(perplexity_log_path): with open(perplexity_log_path, 'w') as fileObject: fileObject.write("Learning_rate: %d \t Optimizer: %s \n" % (FLAGS.learning_rate, optimizer)) fileObject.write("Step \tPerplexity \tBucket perplexity \n") # Avoid allocating all of the GPU memory config = get_session_configs() with tf.device(use_gpu): with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Stream data print("Setting up coordinator") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # This is for the training loop. train_set = [[] for _ in _buckets] dev_set = [[] for _ in _buckets] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] read_line = 0 reading_file_path = "" # Create log writer object print("Create log writer object") summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph=tf.get_default_graph()) reader_train_data = tf.TextLineReader() # skip_header_lines=int, number of lines to skip key, txt_row_train_data = reader_train_data.read(filename_queue) reader_dev_data = tf.TextLineReader() _, txt_row_dev_data = reader_dev_data.read(filename_queue_dev) lowest_perplexity = 20.0 train_time = time.time() print("Starting training loop") try: while current_step < FLAGS.max_train_steps: # not coord.should_stop(): if current_step % FLAGS.print_frequency == 0: print("Step number: " + str(current_step)) read_line, reading_file_path = check_and_shuffle_file(key, sess, read_line, paths['train_path']) # Get a batch train_set, bucket_id = get_batch(txt_row_train_data, train_set, FLAGS.batch_size) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id) # Clean out trained bucket train_set[bucket_id] = [] # Make a step _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) # Calculating variables step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: check_time = time.time() print(get_time(train_time, "to train")) # Print statistics for the previous epoch. dev_set, bucket_id = get_batch(txt_row_dev_data, dev_set, FLAGS.batch_size, ac_function=min) perplexity = exp(float(loss)) if loss < 300 else float("inf") print("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. print("Save checkpoint") checkpoint_path = os.path.join(FLAGS.train_dir, "Ola.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Adding perplexity to tensorboard perplexity_summary = tf.Summary() overall_value = perplexity_summary.value.add() overall_value.tag = "perplexity_overall" overall_value.simple_value = perplexity # Run evals on development set and print their perplexity. print("Run evaluation on development set") bucket_perplexity = "" for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % bucket_id) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id) # Clean out used bucket del dev_set[bucket_id][:FLAGS.batch_size] _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) bucket_perplexity += "\t" + str(eval_ppx) # Adding bucket perplexity to tensorboard bucket_value = perplexity_summary.value.add() bucket_value.tag = "perplexity_bucket %d" % bucket_id bucket_value.simple_value = eval_ppx summary_writer.add_summary(perplexity_summary, model.global_step.eval()) with open(os.path.join(FLAGS.train_dir, paths['perplexity_log']), 'a') as fileObject: fileObject.write(str(model.global_step) + " \t" + str(perplexity) + bucket_perplexity + "\n") # Save model if checkpoint was the best one if perplexity < lowest_perplexity: # and current_step > 400000: lowest_perplexity = perplexity checkpoint_path = os.path.join(FLAGS.train_dir, "Ola_best_.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) sys.stdout.flush() print(get_time(check_time), "to do checkpoint") train_time = time.time() except tf.errors.OutOfRangeError: print('Done training, epoch reached') finally: coord.request_stop() coord.join(threads)
def train(): """Train a en->fr translation model using WMT data.""" print("Checking for needed files") check_for_needed_files_and_create() print("Creating file queues") filename_queue = input_pipeline(root=paths['stateful_datafiles'], start_name="merged_train", shuffle=False) filename_queue_dev = input_pipeline(root=paths['stateful_datafiles'], start_name="merged_dev", shuffle=False) perplexity_log_path = os.path.join(FLAGS.train_dir, paths['perplexity_log']) if not os.path.exists(perplexity_log_path): with open(perplexity_log_path, 'w') as fileObject: fileObject.write( "Learning_rate: %d \t Optimizer: %s \t Lstm %s \n" % (FLAGS.learning_rate, optimizer, FLAGS.use_lstm)) fileObject.write("Step \tPerplexity \tBucket perplexity \n") # Avoid allocating all of the GPU memory config = get_session_configs() with tf.device(use_gpu): with tf.Session(config=config) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Stream data print("Setting up coordinator") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # This is for the training loop. step_time, loss = 0.0, 0.0 current_step = 0 train_set = [[] for _ in range(batch_size)] dev_set = [[] for _ in range(batch_size)] previous_losses = [] read_line = 0 read_line_dev = 0 reading_file_path = paths['merged_train_stateful_path_file1'] reading_dev_file_path = paths['merged_dev_stateful_path'] # Create log writer object print("Create log writer object") summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph=tf.get_default_graph()) key, txt_row_train_data = tf.TextLineReader().read(filename_queue) key_dev, txt_row_dev_data = tf.TextLineReader().read(filename_queue_dev) lowest_perplexity = 20.0 train_time = time.time() # Need an initial state for the encoder rnn if FLAGS.use_lstm: initial_state = np.zeros((num_layers, 2, batch_size, size)) else: initial_state = np.zeros((num_layers, batch_size, size)) state = initial_state dev_state = initial_state print("Starts training loop") try: while FLAGS.max_train_steps >= current_step: # not coord.should_stop(): if current_step % FLAGS.print_frequency == 0: print("Step number" + str(current_step)) # Get a batch # Find empty holders in training set empty_conversations = [index for index, conversation in enumerate(train_set) if conversation == []] if empty_conversations != []: init_key, init_line = sess.run([key, txt_row_train_data]) read_line, reading_file_path = check_and_shuffle_file(init_key, sess, read_line, reading_file_path, stateful=True) train_set, batch_train_set, state = get_stateful_batch(txt_row_train_data, train_set, empty_conversations, init_line, state, size, FLAGS.use_lstm) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_train_set) # Make a step _, step_loss, _, state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, state, False) # Calculating variables step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: check_time = time.time() print(get_time(train_time), "to train") # Print statistics for the previous epoch. empty_dev_conversations = [index for index, conversation in enumerate(dev_set) if conversation == []] if empty_dev_conversations != []: init_key_dev, init_line_dev = sess.run([key_dev, txt_row_dev_data]) read_line_dev, reading_dev_file_path = check_and_shuffle_file(init_key_dev, sess, read_line_dev, reading_dev_file_path, stateful=True, dev=True) dev_set, batch_dev_set, dev_state = get_stateful_batch(txt_row_dev_data, dev_set, empty_dev_conversations, init_line_dev, dev_state, size, FLAGS.use_lstm) perplexity = exp(float(loss)) if loss < 300 else float("inf") print("global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. print("Save checkpoint") checkpoint_path = os.path.join(FLAGS.train_dir, "Vinyals.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Adding perplexity to tensorboard perplexity_summary = tf.Summary() overall_value = perplexity_summary.value.add() overall_value.tag = "perplexity_overall" overall_value.simple_value = perplexity # Run evals on development set and print their perplexity. print("Run evaluation on development set") step_perplexity = "" # Run eval on three steps # 1 encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_dev_set) _, eval_loss, _, dev_state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, dev_state, True) eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: step %d perplexity %.2f" % (1.0, eval_ppx)) step_perplexity += "\t" + str(eval_ppx) # Adding step perplexity to tensorboard step_value = perplexity_summary.value.add() step_value.tag = "perplexity_step %d" % 1.0 step_value.simple_value = eval_ppx # 2 empty_dev_conversations = [index for index, conversation in enumerate(dev_set) if conversation == []] if empty_dev_conversations != []: init_key_dev, init_line_dev = sess.run([key_dev, txt_row_dev_data]) read_line_dev, reading_dev_file_path = check_and_shuffle_file(init_key_dev, sess, read_line_dev, reading_dev_file_path, stateful=True, dev=True) dev_set, batch_dev_set, dev_state = get_stateful_batch(txt_row_dev_data, dev_set,empty_dev_conversations, init_line_dev, dev_state, size, FLAGS.use_lstm) encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_dev_set) _, eval_loss, _, dev_state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, dev_state, True) eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: step %d perplexity %.2f" % (2.0, eval_ppx)) step_perplexity += "\t" + str(eval_ppx) # Adding step perplexity to tensorboard step_value = perplexity_summary.value.add() step_value.tag = "perplexity_step %d" % 2.0 step_value.simple_value = eval_ppx # 3 empty_dev_conversations = [index for index, conversation in enumerate(dev_set) if conversation == []] if empty_dev_conversations != []: init_key_dev, init_line_dev = sess.run([key_dev, txt_row_dev_data]) read_line_dev, reading_dev_file_path = check_and_shuffle_file(init_key_dev, sess, read_line_dev, reading_dev_file_path, stateful=True, dev=True) dev_set, batch_dev_set, dev_state = get_stateful_batch(txt_row_dev_data, dev_set, empty_dev_conversations, init_line_dev, dev_state, size, FLAGS.use_lstm) encoder_inputs, decoder_inputs, target_weights = model.get_batch(batch_dev_set) _, eval_loss, _, dev_state = model.step(sess, encoder_inputs, decoder_inputs, target_weights, dev_state, True) eval_ppx = exp(float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: step %d perplexity %.2f" % (3.0, eval_ppx)) step_perplexity += "\t" + str(eval_ppx) # Adding step perplexity to tensorboard step_value = perplexity_summary.value.add() step_value.tag = "perplexity_step %d" % 3.0 step_value.simple_value = eval_ppx summary_writer.add_summary(perplexity_summary, model.global_step.eval()) with open(os.path.join(FLAGS.train_dir, paths['perplexity_log']), 'a') as fileObject: fileObject.write(str(model.global_step) + " \t" + str(perplexity) + step_perplexity + "\n") # Save model if checkpoint was the best one if perplexity < lowest_perplexity: lowest_perplexity = perplexity checkpoint_path = os.path.join(FLAGS.train_dir, "Vinyals_stateful_best_.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) sys.stdout.flush() get_time(check_time, "to do checkpoint") train_time = time.time() except tf.errors.OutOfRangeError: print('Done training, epoch reached') finally: coord.request_stop() coord.join(threads)