def decode(self): pred_fw = open(self.pred_dir, "w") golden_fw = open(self.golden_dir, "w") for i, eval_data in enumerate(self.data_loader): src_seq, ext_src_seq, _, \ _, tag_seq, oov_lst = eval_data best_question = self.beam_search(src_seq, ext_src_seq, tag_seq) # discard START token output_indices = [int(idx) for idx in best_question.tokens[1:-1]] decoded_words = outputids2words(output_indices, self.idx2tok, oov_lst[0]) try: fst_stop_idx = decoded_words.index(END_ID) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words decoded_words = " ".join(decoded_words) golden_question = self.test_data[i] print("write {}th question\r".format(i)) pred_fw.write(decoded_words + "\n") golden_fw.write(golden_question) pred_fw.close() golden_fw.close()
def do_decode(model, batcher, settings): """ """ vocab = settings.vocab # # decode counter = 0 while True: # batch = batcher.get_next_batch() # 1 example repeated across batch # if batch is None: # finished decoding dataset in single_pass mode assert settings.single_pass, "Dataset exhausted, but we are not in single_pass mode" print("Decoder has finished reading dataset for single_pass.") print( "Output has been saved in %s and %s. Now starting ROUGE eval...", settings.rouge_dir_references, settings.rouge_dir_results) # results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) # rouge_log(results_dict, self._decode_dir) return # original_article = batch["original_articles"][0] # string original_abstract = batch["original_abstracts"][0] # string original_abstract_sents = batch["original_abstracts_sents"][ 0] # list of strings article_withunks = data_utils.show_art_oovs(original_article, vocab) # string abstract_withunks = data_utils.show_abs_oovs( original_abstract, vocab, (batch["art_oovs"][0] if settings.using_pointer_gen else None)) # string # Run beam search to get best Hypothesis best_hyp = decoding_beam_search.run_beam_search( model, batch, vocab, settings) # print( "---------------------------------------------------------------------------" ) print_results(article_withunks, abstract_withunks, "") # # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_hyp.tokens[1:]] decoded_words = data_utils.outputids2words( output_ids, vocab, (batch["art_oovs"][0] if settings.using_pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: first_stop_idx = decoded_words.index( STOP_DECODING) # index of the (first) [STOP] symbol decoded_words = decoded_words[:first_stop_idx] except ValueError: decoded_words = decoded_words # decoded_output = ' '.join(decoded_words) # single string if settings.single_pass: write_for_rouge(original_abstract_sents, decoded_words, counter) counter += 1 # this is how many examples we've decoded else: # print_results(article_withunks, abstract_withunks, decoded_output) print_results("", "", decoded_output) # print( "---------------------------------------------------------------------------" )