and (i + start_epoch + 1) % 5 == 0): filename = data_path + "LFT_result_%d.txt" % (i + start_epoch) polite_responses = run_seq2seq( sess, source_test_polite, target_test, "test", i + start_epoch) neutral_responses = run_seq2seq( sess, source_test_neutral, target_test, "test", i + start_epoch) rude_responses = run_seq2seq( sess, source_test_rude, target_test, "test", i + start_epoch) assert len(polite_responses) == len(neutral_responses) == len(rude_responses) num_responses = len(polite_responses) zipped = zip_lsts( [source_test[:num_responses], target_test[:num_responses], polite_responses, neutral_responses, rude_responses]) flattened = [decode2string(index2token, sent, remove_END_TOKEN=True) for tp in zipped for sent in tp] # now we mark sentences that are generated by our model marked_G = [("G: " + sent) if k % 5 == 1 else sent for (k, sent) in enumerate(flattened)] marked_P = [("P: " + sent) if k % 5 == 2 else sent for (k, sent) in enumerate(marked_G)] marked_N = [("N: " + sent) if k % 5 == 3 else sent
if (((i + start_epoch + 1) >= 10 # only test for later epochs and (i + start_epoch + 1) % 5 == 0) or infer_only and not get_PPL ): # for getting perplexity of test data, use train branch responses = run_seq2seq(sess, source_test, target_test, "test", i + start_epoch) # # need to store all inferred responses in a pickle file # if infer_only: # dump_pickle( # "%sseq2seq_RL_result%s_%d_infer.pkl" % (data_path, extra_str, i + start_epoch), # responses) num_responses = len(responses) zipped = zip_lsts([ source_test[:num_responses], target_test[:num_responses], responses ]) flattened = [ decode2string(index2token, sent, remove_END_TOKEN=True) for tp in zipped for sent in tp ] # now we mark sentences that are generated by our model marked_G = [("G: " + sent) if k % 3 == 1 else sent for (k, sent) in enumerate(flattened)] marked_M = [("M: " + sent) if k % 3 == 2 else sent for (k, sent) in enumerate(marked_G)] filename = "%sseq2seq_RL_result%s_%d.txt" % (data_path, extra_str, i + start_epoch)
def build_seq2seq(input_seqs, target_seqs, filtered_target_seqs, input_seq_lengths, target_seq_lengths, is_training): with tf.variable_scope("seq2seq"): with tf.device('/cpu:0'): reuse = False if get_PPL: keep_prob = tf.convert_to_tensor(1.0) else: keep_prob = get_keep_prob(dropout_rate, is_training) sequence_mask = get_sequence_mask(target_seq_lengths) unk_mask = get_mask(target_seqs, unk_indices) decoder_mask = tf.logical_and(sequence_mask, tf.logical_not(unk_mask)) decoder_mask_float = tf.cast(decoder_mask, tf.float32) # Embed inputs with tf.variable_scope("embedding"): embedding = create_embedding(embedding_word2vec_politeness, embedding_word2vec_movie, shared_vocab_size_politeness, shared_vocab_size_movie, new_vocab_size_politeness, new_vocab_size_movie, "seq2seq") embedded_input_seqs = tf.nn.embedding_lookup( embedding, input_seqs) embedded_target_seqs = tf.nn.embedding_lookup( embedding, target_seqs) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate) tower_grads = [] if credit_assignment: tower_grads_polite = [] sample_ids_lst = [] final_lengths_lst = [] sampled_sample_ids_lst = [] sampled_final_lengths_lst = [] reuse = False trainable_variables = [] num_tokens_lst = [] total_losses = [] for i in xrange(num_gpus): with tf.device("/gpu:%d" % (gpu_start_index + i)): with tf.variable_scope("seq2seq"): if (i == 1): reuse = True start = i * batch_size_per_gpu end = start + batch_size_per_gpu input_max_seq_length = tf.reduce_max( input_seq_lengths[start:end]) target_max_seq_length = tf.reduce_max( target_seq_lengths[start:end]) with tf.variable_scope("encoder", reuse=reuse): cell_fw = create_MultiRNNCell([hidden_size_encoder] * (num_layers_encoder // 2), keep_prob, num_proj=None, reuse=reuse) cell_bw = create_MultiRNNCell([hidden_size_encoder] * (num_layers_encoder // 2), keep_prob, num_proj=None, reuse=reuse) (encoder_outputs_original, encoder_final_state_original ) = bidirecitonal_dynamic_lstm( cell_fw, cell_bw, embedded_input_seqs[ start:end, :input_max_seq_length, :], input_seq_lengths[start:end]) [ encoder_outputs, encoder_seq_lengths, encoder_final_state ] = tf.cond(is_training, lambda: [ encoder_outputs_original, input_seq_lengths[start:end], encoder_final_state_original ], lambda: [ tf.contrib.seq2seq.tile_batch(encoder_outputs_original, beam_width), tf.contrib.seq2seq.tile_batch( input_seq_lengths[start:end], beam_width), tile_multi_cell_state(encoder_final_state_original) ]) # only works for decoder that has >1 layers! with tf.variable_scope("decoder", reuse=reuse): decoder_cell = create_MultiRNNCell( [hidden_size_decoder] * (num_layers_decoder), keep_prob, num_proj=vocab_size, memory=encoder_outputs, memory_seq_lengths=encoder_seq_lengths, reuse=reuse) decoder_zero_state = tf.cond( is_training, lambda: decoder_cell.zero_state( batch_size_per_gpu, tf.float32), lambda: decoder_cell.zero_state( batch_size_per_gpu * beam_width, tf.float32)) state_last = decoder_zero_state[-1].clone( cell_state=encoder_final_state[-1]) state_previous = encoder_final_state[:-1] decoder_initial_state = state_previous + ( state_last, ) # concat tuples # training helper (for teacher forcing) helper_train = tf.contrib.seq2seq.TrainingHelper( embedded_target_seqs[ start:end, :target_max_seq_length - 1, :], # get rid of end_token target_seq_lengths[start:end] - 1) # the length is thus decreased by 1 (decoder_outputs_train, _) = decode(decoder_cell, helper_train, initial_state=decoder_initial_state) (logits, _) = decoder_outputs_train # Get trainable_variables # (up to now we already have all the seq2seq trainable vars) if trainable_variables == []: trainable_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="seq2seq") loss_ML = tf.contrib.seq2seq.sequence_loss( logits, target_seqs[ start:end, 1:target_max_seq_length], # get rid of start_token decoder_mask_float[start:end, 1:target_max_seq_length]) num_tokens = tf.reduce_sum( decoder_mask_float[start:end, 1:target_max_seq_length]) num_tokens_lst.append(num_tokens) total_loss = loss_ML * num_tokens total_losses.append(total_loss) if polite_training: helper_sample = tf.contrib.seq2seq.SampleEmbeddingHelper( embedding, start_tokens[start:end], end_token) (decoder_outputs_sample, final_lengths_sample) = decode( decoder_cell, helper_sample, decoder_initial_state) (logits_sample, sample_ids_sample) = decoder_outputs_sample max_final_lengths_sample = tf.reduce_max( final_lengths_sample) sampled_sample_ids_lst.append( pad_and_truncate(sample_ids_sample, final_lengths_sample)) sampled_final_lengths_lst.append(final_lengths_sample) # Compute sampled sequence loss WITHOUT averaging (will do that later) decoder_mask_sample = get_sequence_mask( final_lengths_sample, dtype=tf.float32) seq_losses_sample = tf.contrib.seq2seq.sequence_loss( logits_sample, sample_ids_sample, decoder_mask_sample, average_across_timesteps=False, average_across_batch=False) if polite_training: with tf.variable_scope( "classifier"): # jump back to the classifier scope # Filter out tokens that the classifier doesn't know vocab_mask = tf.cast( sample_ids_sample < vocab_size_politeness, tf.int32) sample_ids_sample_classifier = sample_ids_sample * vocab_mask # Feed sampled ids to classifier (scores_RL, credit_weights_RL) = build_classifier( sample_ids_sample_classifier, final_lengths_sample, reuse) # Stop gradients from propagating back scores_RL_stop = tf.stop_gradient(scores_RL) credit_weights_RL_stop = tf.stop_gradient( credit_weights_RL) if thresholding: # Filter scores that are >= threshold and <= 1 - threshold filtered_scores_RL = tf.map_fn(filter_with_threshold, scores_RL_stop) else: filtered_scores_RL = scores_RL_stop with tf.variable_scope("seq2seq"): with tf.variable_scope("decoder", reuse=reuse): # Get valid mask for sampled sequence decoder_mask_classifier = tf.cast( tf.not_equal(sample_ids_sample, 0), tf.float32 ) # propagate back the whole sentence (including <end>) tiled_scores = tf.tile( # tile scores to 2D tf.expand_dims(filtered_scores_RL - baseline, axis=1), [1, max_final_lengths_sample]) if flip_polite: # if we actually want a rude dialogue system tiled_scores = -1.0 * tiled_scores # Compute seq losses for polite-RL seq_losses_classifier = ( beta * seq_losses_sample * decoder_mask_classifier / tf.reduce_sum(decoder_mask_classifier) * tiled_scores) if credit_assignment: grads_polite = tf.gradients( seq_losses_classifier, trainable_variables, grad_ys=credit_weights_RL_stop ) # credit weights as initial gradients grads_polite = zip_lsts( [grads_polite, trainable_variables]) tower_grads_polite.append(grads_polite) else: loss_polite = tf.reduce_sum(seq_losses_classifier) else: credit_weights_RL_stop = None with tf.variable_scope("seq2seq"): with tf.variable_scope("decoder", reuse=reuse): # Infer branch (beam search!) beam_search_decoder = tf.contrib.seq2seq.BeamSearchDecoder( decoder_cell, embedding, start_tokens[start:end], end_token, decoder_initial_state, beam_width, length_penalty_weight=length_penalty_weight) output_beam = tf.contrib.seq2seq.dynamic_decode( beam_search_decoder, # impute_finished=True, # cannot be used with Beamsearch maximum_iterations=max_iterations, swap_memory=True) sample_ids = output_beam[0].predicted_ids[:, :, 0] final_lengths = output_beam[2][:, 0] sample_ids_lst.append( pad_and_truncate(sample_ids, final_lengths)) final_lengths_lst.append(final_lengths) with tf.device("/gpu:%d" % (gpu_start_index + i)): with tf.variable_scope("seq2seq", reuse=reuse): # Compute loss loss = loss_ML if polite_training and not credit_assignment: loss = loss + loss_polite # Compute tower gradients grads = compute_grads(loss, optimizer, trainable_variables) tower_grads.append(grads) with tf.device('/cpu:0'): with tf.variable_scope("seq2seq"): # Concat sample ids and their respective lengths batch_sample_ids = tf.concat(sample_ids_lst, axis=0) batch_final_lengths = tf.concat(final_lengths_lst, axis=0) if polite_training: batch_sampled_sample_ids = tf.concat(sampled_sample_ids_lst, axis=0) batch_total_loss = tf.add_n(total_losses) batch_num_tokens = tf.add_n(num_tokens_lst) # Thus, the effective batch size is actually batch_size_per_gpu if polite_training and credit_assignment: apply_gradients_op = apply_multiple_grads( optimizer, [tower_grads, tower_grads_polite]) else: apply_gradients_op = apply_grads(optimizer, tower_grads) return (batch_sample_ids, batch_final_lengths, batch_total_loss, batch_num_tokens, apply_gradients_op, credit_weights_RL_stop, embedding)
def run_vhred(model, sess, mode, epoch): training_flag = True if mode == "train" else False norm_dialogues = norm_data_dict[mode] adv_dialogues = adv_data_dict[mode] generator = DataGenerator(norm_dialogues, adv_dialogues=adv_dialogues, feed_both_examples=feed_both_examples, is_training=training_flag, batch_size=batch_size, max_dialogue_length=max_dialogue_length) batch = generator.batch_generator() print("Initialized data generator.") responses = [] total_loss = 0.0 adv_total_loss = 0.0 total_num_tokens = 0.0 batch_counter = 0 if mode != "train": source_lst = [] target_lst = [] dialogue_indices_lst = [] start_turn_indices_lst = [] if use_max_margin: avg_margin = 0.0 while True: next_batch = next(batch) if next_batch is None: break # if it's os, we always set start_turn_indices to 0 (dialogue_indices, start_turn_indices, examples, turn_lengths_lst) = next_batch feed_dict_seqs = { model.dialogue: examples, model.turn_length: turn_lengths_lst, model.start_turn_index: start_turn_indices, model.start_tokens: [start_token] * batch_size } if mode == "train": if use_max_margin: fetches = [ model.batch_total_loss, model.batch_num_tokens, model.apply_gradients_op, model.avg_margin_loss, # testing model.global_step ] else: fetches = [ model.batch_total_loss, model.batch_num_tokens, model.apply_gradients_op, model.global_step ] feed_dict = { model.keep_prob: 1 - dropout_rate, model.is_training: training_flag } result = sess.run(fetches, feed_dict={ **feed_dict_seqs, **feed_dict }) if use_max_margin: avg_margin = (avg_margin * batch_counter + result[-2]) / (batch_counter + 1) print( "Avg margin (this should be getting smaller (or getting larger in abs. value) over time):", avg_margin) if feed_both_examples: (loss, adv_loss) = result[0] else: loss = result[0] average_log_perplexity = loss / result[1] total_loss += loss total_num_tokens += result[1] print("Epoch (%s) %d, Batch %d, Global step %d:" % (mode, epoch, batch_counter, result[-1])) print("Perplexity: %.2f" % exp(average_log_perplexity)) print("Perplexity so far:", exp(total_loss / total_num_tokens)) if feed_both_examples: adv_average_log_perplexity = adv_loss / result[1] adv_total_loss += adv_loss print("Adv-perplexity: %.2f" % exp(adv_average_log_perplexity)) print("Adv-perplexity so far:", exp(adv_total_loss / total_num_tokens)) else: (source, target) = get_source_and_target(examples) source_lst.extend(source) target_lst.extend(target) dialogue_indices_lst.extend(dialogue_indices) start_turn_indices_lst.extend(start_turn_indices) feed_dict = { model.keep_prob: 1.0, model.is_training: training_flag } (ids, lengths) = sess.run( [model.batch_sample_ids_beam, model.batch_final_lengths_beam], feed_dict={ **feed_dict_seqs, **feed_dict }) batch_responses = [[ index for index in response[:length] ] for (response, length) in zip(ids.tolist(), lengths.tolist())] responses.extend(batch_responses) print("Finished testing batch %d" % batch_counter) batch_counter += 1 if mode == "train": epoch_perplexity = total_loss / total_num_tokens print("Epoch (%s) %d average perplexity: %.2f" % (mode, epoch, exp(epoch_perplexity))) if force_store_point == "": store_ckpt = os.path.join(ckpt_path, f"{model_extra_str}_{epoch}") else: store_ckpt = force_store_point saver_seq2seq.save(sess, store_ckpt) print(f"Checkpoint saved for epoch {epoch}.") else: zipped = zip_lsts([ dialogue_indices_lst, start_turn_indices_lst, source_lst, target_lst, responses ]) zipped.sort(key=lambda x: x[:2] ) # sort on dialogue indices & start_turn_indices zipped_responses = zip_lsts(unzip_lst(zipped)[2:]) return zipped_responses
def run_seq2seq(sess, mode, epoch, feed_score=1.0): """see if we need to append end_token""" is_training = (mode == "train") if is_training: (source_lst, target_lst, score_lst) = unzip_lst(LFT_examples) else: (source_lst, target_lst) = data_dict[mode] score_lst = [feed_score] * len(source_lst) # source_lst = source_lst[:batch_size * 2] # target_lst = target_lst[:batch_size * 2] # score_lst = score_lst[:batch_size * 2] num_examples = len(source_lst) assert num_examples >= batch_size num_batches = num_examples // batch_size keep_prob = (1 - dropout_rate) if is_training else 1.0 start_tokens = [start_token] * batch_size total_loss = 0.0 num_tokens = 0 zipped_lst = [] for i in range(num_batches): start = i * batch_size end = start + batch_size sources = source_lst[start:end] source_lengths = list(map(len, sources)) targets = target_lst[start:end] target_lengths = list(map(len, targets)) scores = score_lst[start:end] feed_dict = { model.source: pad(sources, source_lengths), model.source_length: source_lengths, model.target: pad(targets, target_lengths), model.target_length: target_lengths, model.start_tokens: start_tokens, model.keep_prob: keep_prob, model.is_training: is_training, model.score: scores} if is_training: fetches = [model.batch_total_loss, model.batch_num_tokens, model.apply_gradients_op] else: fetches = [model.batch_sample_ids_beam, model.batch_final_lengths_beam] result = sess.run(fetches, feed_dict=feed_dict) if is_training: total_loss += result[0] num_tokens += result[1] print("Epoch (%s) %d Batch %d perplexity: %.2f" % (mode, epoch, i, exp(result[0] / result[1]))) print("Perplexity so far:", exp(total_loss / num_tokens)) else: print("Finished testing batch %d" % i) responses = [response[:length] for (response, length) in zip(result[0].tolist(), result[1].tolist())] zipped = zip_lsts([sources, targets, responses]) zipped_lst.extend(zipped) if is_training: print("Epoch (%s) %d average perplexity: %.2f" % (mode, epoch, exp(total_loss / num_tokens))) if not get_PPL: saver_seq2seq.save(sess, "%sseq2seq_RL%s_%d" % (ckpt_path, extra_str, epoch)) print("Checkpoint saved for epoch %d." % epoch) return zipped_lst
def zip_remove_duplicates_unzip(lsts): zipped = zip_lsts(lsts) zipped_without_duplicates = remove_duplicates(zipped) unzipped = unzip_lst(zipped_without_duplicates) return unzipped
if infer_only and not get_PPL and (i + start_epoch - 1) % 5 == 0: # for getting perplexity of test data, use train branch print("Inferring on test set...") mode = "test" responses_lst = [] source_lst = [] target_lst = [] score_range = list(np.arange(0.0, 1.1, 0.5)) for score in score_range: zipped_responses = run_seq2seq( sess, mode, i + start_epoch, feed_score=score) (source_lst, target_lst, responses) = unzip_lst(zipped_responses) responses_lst.append(responses) num_responses = len(responses_lst[0]) zipped = zip_lsts([source_lst, target_lst] + responses_lst) flattened = [decode2string(index2token, sent, end_token=end_token, remove_END_TOKEN=True) for tp in zipped for sent in tp] # now we mark sentences that are generated by our model num_lines = len(score_range) + 2 marked_G = [("G: " + sent) if k % num_lines == 1 else sent for (k, sent) in enumerate(flattened)] marked_M = [("M: " + sent) if k % num_lines in range(2, num_lines) else sent for (k, sent) in enumerate(marked_G)] filename = ("%sseq2seq_RL_%s_result%s_%d.txt" %
# Run fusion model for different fusion rates responses_lst = [] for fusion_rate in fusion_rate_candidates: responses = run_fusion(sess, fusion_rate, source_test) dump_pickle( data_path + "/fusion_%.1f_%d_infer.pkl" % (fusion_rate, seq2seq_epoch), responses) responses_lst.append(responses) num_responses = len(responses_lst[0]) print("Generated %d responses for each fusion rate." % num_responses) # add in source sents and ground truths zipped_responses = zip_lsts([source_test[:num_responses]] + [target_test[:num_responses]] + responses_lst) # Write results to file filename = data_path + "fusion_responses_%.1f.txt" % fusion_rate_candidates[ 0] text_zipped_responses = [[ label + decode2string(index2token, response, remove_END_TOKEN=True) for (label, response) in zip(["", "G: "] + convert_list_to_str_list(fusion_rate_candidates), responses) ] for responses in zipped_responses] flattened_text_responses = [ response for responses in text_zipped_responses for response in responses