def main(start_epoch): (model, graph, saver_seq2seq) = build_model() config = gpu_config() with tf.Session(graph=graph, config=config) as sess: sess.run(tf.global_variables_initializer()) print("Initialized.") restore_ckpt = None if start_epoch > -1: if force_restore_point != "": restore_ckpt = force_restore_point else: restore_ckpt = f"{ckpt_path}/seq2seq_RL{restore_extra_str}_{start_epoch}" if restore_ckpt is not None: saver_seq2seq.restore(sess, restore_ckpt) print("Restored from", restore_ckpt) for i in xrange(num_epochs): if not infer_only: # for getting perplexity of test data, use train branch mode = "train" start_epoch += 1 run_vhred(model, sess, mode, start_epoch) if not no_validation and not glimpse_training and start_epoch % 5 == 0 and start_epoch >= 10: mode = "valid" zipped_responses = run_vhred(model, sess, mode, start_epoch) else: continue else: print("Inferring on test set...") mode = "test" zipped_responses = run_vhred(model, sess, mode, start_epoch) # Make sure sent is not empty and always ends with an eou flattened = [ decode2string(index2token, sent, end_token=end_token_str, remove_END_TOKEN=True) for tp in zipped_responses for sent in tp ] flattened = [maybe_add(sent, eou_str) for sent in flattened] # now we mark sentences that are generated by our model marked_G = [("G: " + sent) if k % 3 == 1 else sent for (k, sent) in enumerate(flattened)] marked_M = [("M: " + sent) if k % 3 == 2 else sent for (k, sent) in enumerate(marked_G)] filename = f"{output_path}/{extra_str[1:]}_{mode}_result_{start_epoch}.txt" write_lines(filename, marked_M) # only need 1 epoch for inferring or getting PPL if infer_only: break
neutral_responses = run_seq2seq( sess, source_test_neutral, target_test, "test", i + start_epoch) rude_responses = run_seq2seq( sess, source_test_rude, target_test, "test", i + start_epoch) assert len(polite_responses) == len(neutral_responses) == len(rude_responses) num_responses = len(polite_responses) zipped = zip_lsts( [source_test[:num_responses], target_test[:num_responses], polite_responses, neutral_responses, rude_responses]) flattened = [decode2string(index2token, sent, remove_END_TOKEN=True) for tp in zipped for sent in tp] # now we mark sentences that are generated by our model marked_G = [("G: " + sent) if k % 5 == 1 else sent for (k, sent) in enumerate(flattened)] marked_P = [("P: " + sent) if k % 5 == 2 else sent for (k, sent) in enumerate(marked_G)] marked_N = [("N: " + sent) if k % 5 == 3 else sent for (k, sent) in enumerate(marked_P)] marked_R = [("R: " + sent) if k % 5 == 4 else sent for (k, sent) in enumerate(marked_N)]
dump_pickle( data_path + "/fusion_%.1f_%d_infer.pkl" % (fusion_rate, seq2seq_epoch), responses) responses_lst.append(responses) num_responses = len(responses_lst[0]) print("Generated %d responses for each fusion rate." % num_responses) # add in source sents and ground truths zipped_responses = zip_lsts([source_test[:num_responses]] + [target_test[:num_responses]] + responses_lst) # Write results to file filename = data_path + "fusion_responses_%.1f.txt" % fusion_rate_candidates[ 0] text_zipped_responses = [[ label + decode2string(index2token, response, remove_END_TOKEN=True) for (label, response) in zip(["", "G: "] + convert_list_to_str_list(fusion_rate_candidates), responses) ] for responses in zipped_responses] flattened_text_responses = [ response for responses in text_zipped_responses for response in responses ] write_lines(filename, flattened_text_responses)