def decode(self): start = time.time() counter = 0 batch = self.batcher.next_batch() while batch is not None: # Run beam search to get best Hypothesis best_summary = self.beam_search(batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_summary.tokens[1:]] decoded_words = data.outputids2words(output_ids, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words original_abstract_sents = batch.original_abstracts_sents[0] write_for_rouge(original_abstract_sents, decoded_words, counter, self._rouge_ref_dir, self._rouge_dec_dir) counter += 1 if counter % 1000 == 0: print('%d example in %d sec'%(counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir)
def decode(self): start = time.time() counter = 0 batch = self.batcher.next_batch() while batch is not None: # Run beam search to get best Hypothesis best_summary = self.beam_search(batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_summary.tokens[1:]] decoded_words = data.outputids2words(output_ids, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words original_abstract_sents = batch.original_abstracts_sents[0] write_for_rouge(original_abstract_sents, decoded_words, counter, self._rouge_ref_dir, self._rouge_dec_dir) counter += 1 if counter % 1000 == 0: print('%d example in %d sec'%(counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir)
def rouge_eval(self): print("Now starting ROUGE eval...") n_bytes = [75, 275, None] for n_byte in n_bytes: print(" ****************** n_bytes=%s *****************" % str(n_byte)) results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir, n_bytes=n_byte) rouge_log(results_dict, self._decode_dir)
def decode(self): start = time.time() counter = 0 batch = self.batcher.next_batch() #print(batch.enc_batch) while batch is not None: # Run beam search to get best Hypothesis enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_0, coverage_t_0 = get_input_from_batch( batch, use_cuda) enc_batch = enc_batch[0:1, :] enc_padding_mask = enc_padding_mask[0:1, :] in_seq = enc_batch in_pos = self.get_pos_data(enc_padding_mask) #print("enc_padding_mask", enc_padding_mask) #print("Summarizing one batch...") batch_hyp, batch_scores = self.summarize_batch(in_seq, in_pos) # Extract the output ids from the hypothesis and convert back to words output_words = np.array(batch_hyp) output_words = output_words[:, 0, 1:] for i, out_sent in enumerate(output_words): decoded_words = data.outputids2words( out_sent, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) original_abstract_sents = batch.original_abstracts_sents[i] write_for_rouge(original_abstract_sents, decoded_words, counter, self._rouge_ref_dir, self._rouge_dec_dir) counter += 1 if counter % 1 == 0: print('%d example in %d sec' % (counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir)
import os import sys from data_util.utils import write_for_rouge, rouge_eval, rouge_log if __name__ == '__main__': model_filename = sys.argv[1] print("ROUGE calculation for decoded text.") results_dict = rouge_eval("log/" + model_filename + "/rouge_ref", "log/" + model_filename + "/rouge_dec_dir") rouge_log(results_dict, "log/" + model_filename + "/rouge_calc")
def decode(self, data_fn): start = time.time() counter = 0 batch = self.batcher.next_batch() while batch is not None: # Run beam search to get best Hypothesis # Here try to return the p_gen values best_summary = self.beam_search(batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_summary.tokens[1:]] decoded_words = data.outputids2words( output_ids, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) # Extract list of p_gen values without first value p_gens = best_summary.p_gens[1:] # Extract list of contexts values without first value #context_list = best_summary.context_list[1:] #there is no start for these ones final_dist_list = best_summary.final_dist_list[:] vocab_list = best_summary.vocab_list[:] # Remove the [STOP] token from decoded_words, if necessary try: fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] p_gens = p_gens[:fst_stop_idx] #context_list = context_list [:fst_stop_idx] output_ids_nostop = output_ids[:fst_stop_idx] final_dist_list = final_dist_list[:fst_stop_idx] vocab_list = vocab_list[:fst_stop_idx] except ValueError: decoded_words = decoded_words original_abstract_sents = batch.original_abstracts_sents[0] write_for_rouge(original_abstract_sents, decoded_words, counter, self._rouge_ref_dir, self._rouge_dec_dir) # analyze relation of pgen to the decoded words analyze_pgen(data_fn=data_fn, vocab=self.vocab, reference_sents=original_abstract_sents, input_art_ids=batch.enc_batch, oov_ids=batch.art_oovs[0], decoded_word_ids=output_ids_nostop, decoded_words=decoded_words, final_dist=final_dist_list, vocab_dist=vocab_list, p_gens=p_gens) counter += 1 if counter % 1000 == 0: print('%d example in %d sec' % (counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir)