def decode(self):
        start = time.time()
        counter = 0
        batch = self.batcher.next_batch()
        while batch is not None:
            # Run beam search to get best Hypothesis
            best_summary = self.beam_search(batch)

            # Extract the output ids from the hypothesis and convert back to words
            output_ids = [int(t) for t in best_summary.tokens[1:]]
            decoded_words = data.outputids2words(output_ids, self.vocab,
                                                 (batch.art_oovs[0] if config.pointer_gen else None))

            # Remove the [STOP] token from decoded_words, if necessary
            try:
                fst_stop_idx = decoded_words.index(data.STOP_DECODING)
                decoded_words = decoded_words[:fst_stop_idx]
            except ValueError:
                decoded_words = decoded_words

            original_abstract_sents = batch.original_abstracts_sents[0]

            write_for_rouge(original_abstract_sents, decoded_words, counter,
                            self._rouge_ref_dir, self._rouge_dec_dir)
            counter += 1
            if counter % 1000 == 0:
                print('%d example in %d sec'%(counter, time.time() - start))
                start = time.time()

            batch = self.batcher.next_batch()

        print("Decoder has finished reading dataset for single_pass.")
        print("Now starting ROUGE eval...")
        results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
        rouge_log(results_dict, self._decode_dir)
Exemple #2
0
    def decode(self):
        start = time.time()
        counter = 0
        batch = self.batcher.next_batch()
        while batch is not None:
            # Run beam search to get best Hypothesis
            best_summary = self.beam_search(batch)

            # Extract the output ids from the hypothesis and convert back to words
            output_ids = [int(t) for t in best_summary.tokens[1:]]
            decoded_words = data.outputids2words(output_ids, self.vocab,
                                                 (batch.art_oovs[0] if config.pointer_gen else None))

            # Remove the [STOP] token from decoded_words, if necessary
            try:
                fst_stop_idx = decoded_words.index(data.STOP_DECODING)
                decoded_words = decoded_words[:fst_stop_idx]
            except ValueError:
                decoded_words = decoded_words

            original_abstract_sents = batch.original_abstracts_sents[0]

            write_for_rouge(original_abstract_sents, decoded_words, counter,
                            self._rouge_ref_dir, self._rouge_dec_dir)
            counter += 1
            if counter % 1000 == 0:
                print('%d example in %d sec'%(counter, time.time() - start))
                start = time.time()

            batch = self.batcher.next_batch()

        print("Decoder has finished reading dataset for single_pass.")
        print("Now starting ROUGE eval...")
        results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
        rouge_log(results_dict, self._decode_dir)
Exemple #3
0
 def rouge_eval(self):
     print("Now starting ROUGE eval...")
     n_bytes = [75, 275, None]
     for n_byte in n_bytes:
         print(" ****************** n_bytes=%s *****************" %
               str(n_byte))
         results_dict = rouge_eval(self._rouge_ref_dir,
                                   self._rouge_dec_dir,
                                   n_bytes=n_byte)
         rouge_log(results_dict, self._decode_dir)
Exemple #4
0
    def decode(self):
        start = time.time()
        counter = 0
        batch = self.batcher.next_batch()
        #print(batch.enc_batch)

        while batch is not None:

            # Run beam search to get best Hypothesis
            enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_0, coverage_t_0 = get_input_from_batch(
                batch, use_cuda)

            enc_batch = enc_batch[0:1, :]
            enc_padding_mask = enc_padding_mask[0:1, :]

            in_seq = enc_batch
            in_pos = self.get_pos_data(enc_padding_mask)
            #print("enc_padding_mask", enc_padding_mask)

            #print("Summarizing one batch...")

            batch_hyp, batch_scores = self.summarize_batch(in_seq, in_pos)

            # Extract the output ids from the hypothesis and convert back to words
            output_words = np.array(batch_hyp)
            output_words = output_words[:, 0, 1:]

            for i, out_sent in enumerate(output_words):

                decoded_words = data.outputids2words(
                    out_sent, self.vocab,
                    (batch.art_oovs[0] if config.pointer_gen else None))

                original_abstract_sents = batch.original_abstracts_sents[i]

                write_for_rouge(original_abstract_sents, decoded_words,
                                counter, self._rouge_ref_dir,
                                self._rouge_dec_dir)
                counter += 1

            if counter % 1 == 0:
                print('%d example in %d sec' % (counter, time.time() - start))
                start = time.time()

            batch = self.batcher.next_batch()

        print("Decoder has finished reading dataset for single_pass.")
        print("Now starting ROUGE eval...")
        results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
        rouge_log(results_dict, self._decode_dir)
Exemple #5
0
import os
import sys

from data_util.utils import write_for_rouge, rouge_eval, rouge_log

if __name__ == '__main__':
    model_filename = sys.argv[1]
    print("ROUGE calculation for decoded text.")
    results_dict = rouge_eval("log/" + model_filename + "/rouge_ref",
                              "log/" + model_filename + "/rouge_dec_dir")
    rouge_log(results_dict, "log/" + model_filename + "/rouge_calc")
    def decode(self, data_fn):
        start = time.time()
        counter = 0
        batch = self.batcher.next_batch()
        while batch is not None:
            # Run beam search to get best Hypothesis
            # Here try to return the p_gen values
            best_summary = self.beam_search(batch)

            # Extract the output ids from the hypothesis and convert back to words
            output_ids = [int(t) for t in best_summary.tokens[1:]]
            decoded_words = data.outputids2words(
                output_ids, self.vocab,
                (batch.art_oovs[0] if config.pointer_gen else None))

            # Extract list of p_gen values without first value
            p_gens = best_summary.p_gens[1:]
            # Extract list of contexts values without first value
            #context_list = best_summary.context_list[1:]
            #there is no start for these ones
            final_dist_list = best_summary.final_dist_list[:]
            vocab_list = best_summary.vocab_list[:]

            # Remove the [STOP] token from decoded_words, if necessary
            try:
                fst_stop_idx = decoded_words.index(data.STOP_DECODING)
                decoded_words = decoded_words[:fst_stop_idx]
                p_gens = p_gens[:fst_stop_idx]
                #context_list = context_list [:fst_stop_idx]
                output_ids_nostop = output_ids[:fst_stop_idx]
                final_dist_list = final_dist_list[:fst_stop_idx]
                vocab_list = vocab_list[:fst_stop_idx]
            except ValueError:
                decoded_words = decoded_words

            original_abstract_sents = batch.original_abstracts_sents[0]

            write_for_rouge(original_abstract_sents, decoded_words, counter,
                            self._rouge_ref_dir, self._rouge_dec_dir)

            # analyze relation of pgen to the decoded words
            analyze_pgen(data_fn=data_fn,
                         vocab=self.vocab,
                         reference_sents=original_abstract_sents,
                         input_art_ids=batch.enc_batch,
                         oov_ids=batch.art_oovs[0],
                         decoded_word_ids=output_ids_nostop,
                         decoded_words=decoded_words,
                         final_dist=final_dist_list,
                         vocab_dist=vocab_list,
                         p_gens=p_gens)

            counter += 1
            if counter % 1000 == 0:
                print('%d example in %d sec' % (counter, time.time() - start))
                start = time.time()

            batch = self.batcher.next_batch()

        print("Decoder has finished reading dataset for single_pass.")
        print("Now starting ROUGE eval...")
        results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
        rouge_log(results_dict, self._decode_dir)