def decode(self):
        pred_fw = open(self.pred_dir, "w")
        golden_fw = open(self.golden_dir, "w")
        for i, eval_data in enumerate(self.data_loader):
            src_seq, ext_src_seq, _, \
                _, tag_seq, oov_lst = eval_data

            best_question = self.beam_search(src_seq, ext_src_seq, tag_seq)
            # discard START  token
            output_indices = [int(idx) for idx in best_question.tokens[1:-1]]
            decoded_words = outputids2words(output_indices, self.idx2tok,
                                            oov_lst[0])
            try:
                fst_stop_idx = decoded_words.index(END_ID)
                decoded_words = decoded_words[:fst_stop_idx]
            except ValueError:
                decoded_words = decoded_words
            decoded_words = " ".join(decoded_words)
            golden_question = self.test_data[i]
            print("write {}th question\r".format(i))
            pred_fw.write(decoded_words + "\n")
            golden_fw.write(golden_question)

        pred_fw.close()
        golden_fw.close()
Example #2
0
def do_decode(model, batcher, settings):
    """
    """
    vocab = settings.vocab
    #
    # decode
    counter = 0
    while True:
        #
        batch = batcher.get_next_batch()  # 1 example repeated across batch
        #
        if batch is None:  # finished decoding dataset in single_pass mode
            assert settings.single_pass, "Dataset exhausted, but we are not in single_pass mode"
            print("Decoder has finished reading dataset for single_pass.")
            print(
                "Output has been saved in %s and %s. Now starting ROUGE eval...",
                settings.rouge_dir_references, settings.rouge_dir_results)
            # results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
            # rouge_log(results_dict, self._decode_dir)
            return
        #
        original_article = batch["original_articles"][0]  # string
        original_abstract = batch["original_abstracts"][0]  # string
        original_abstract_sents = batch["original_abstracts_sents"][
            0]  # list of strings

        article_withunks = data_utils.show_art_oovs(original_article,
                                                    vocab)  # string
        abstract_withunks = data_utils.show_abs_oovs(
            original_abstract, vocab,
            (batch["art_oovs"][0]
             if settings.using_pointer_gen else None))  # string

        # Run beam search to get best Hypothesis
        best_hyp = decoding_beam_search.run_beam_search(
            model, batch, vocab, settings)

        #
        print(
            "---------------------------------------------------------------------------"
        )
        print_results(article_withunks, abstract_withunks, "")
        #

        # Extract the output ids from the hypothesis and convert back to words
        output_ids = [int(t) for t in best_hyp.tokens[1:]]
        decoded_words = data_utils.outputids2words(
            output_ids, vocab,
            (batch["art_oovs"][0] if settings.using_pointer_gen else None))

        # Remove the [STOP] token from decoded_words, if necessary
        try:
            first_stop_idx = decoded_words.index(
                STOP_DECODING)  # index of the (first) [STOP] symbol
            decoded_words = decoded_words[:first_stop_idx]
        except ValueError:
            decoded_words = decoded_words
        #
        decoded_output = ' '.join(decoded_words)  # single string

        if settings.single_pass:
            write_for_rouge(original_abstract_sents, decoded_words, counter)
            counter += 1  # this is how many examples we've decoded
        else:
            # print_results(article_withunks, abstract_withunks, decoded_output)
            print_results("", "", decoded_output)
        #
        print(
            "---------------------------------------------------------------------------"
        )