def extract_summary(self, ): data = read_json(self.full_path) articles = [] abstracts = [] oracle_ids = [] for item in data: articles.append(item['article']) abstracts.append(item['abstract']) if 'NYT' in self.full_path: oracle_ids.append(item['oracle_sens']) else: oracle_ids.append(item['oracle']) data_iterator = zip(articles, abstracts, oracle_ids) # print(data_iterator) summaries = [] references = [] for item in tqdm(data_iterator, desc="Oracle:"): article, abstract, oracle_id = item summary = [article[x] for x in oracle_id] summaries.append(summary) references.append([abstract]) # result = evaluate_rouge(summaries, references, remove_temp=True, rouge_args=[]) result = test_rouge(summaries, references, self.processors) return result
def extract_summary(self, ): data = read_json(self.full_path) articles = [] abstracts = [] for item in data: articles.append(item['article']) abstracts.append([item['abstract']]) lxr = LexRank(articles, stopwords=STOPWORDS['en']) summaries = [lxr.get_summary(x, summary_size=self.summary_size, threshold=self.threshold) for x in tqdm(articles, desc="LexRank:")] res = test_rouge(summaries, abstracts, self.processors) return res
def extract_summary(self, ): data = read_json(self.full_path) articles = [] abstracts = [] for item in data: articles.append("\n".join(item['article'])) abstracts.append([item['abstract']]) summaries = [] for article in tqdm(articles, desc="Text Rank:"): summaries.append(summarize(article, split=True, ratio=self.extract_ratio)) res = test_rouge(summaries, abstracts, self.processors) return res
def extract_summary(self, ): data = read_json(self.full_path) articles = [] abstracts = [] for item in data: articles.appenditem['article']) abstracts.append(item['abstract']) data_iterator = zip(articles, abstracts) summaries = [] references = [] for item in tqdm(data_iterator, desc="Lead:"): article, abstract = item summary = article[:self.extract_num] summaries.append(summary) references.append([abstract]) result = test_rouge(summaries, references, self.processors) return result
def _report_rouge(self, gold_path, can_path): self.logger.info("Calculating Rouge") results_dict = test_rouge(self.args.temp_dir, can_path, gold_path) return results_dict
if __name__=='__main__': data = read_json(pjoin(sys.argv[1], f"{sys.argv[2]}.json")) articles = [] abstracts = [] for item in data: articles.append(item['article']) abstracts.append([item['abstract']]) list_of_summarization = [] for i in range(len(articles)): sample = articles[i] sentences = processFile(sample) original_sentences = [ sent.originalWords for sent in sentences ] IDF_w = IDFs(sentences) TF_IDF_w = TF_IDF(sentences) # build query; set the number of words to include in our query query = buildQuery(sentences, TF_IDF_w, 8) # pick a sentence that best matches the query best1sentence = bestSentence(sentences, query, IDF_w) summary = makeSummary(sentences, best1sentence, query, 100, 0.5, IDF_w) tmp_summary = [] for sent in summary: original_sent = sent.getOriginalWords() tmp_summary.append(original_sent) list_of_summarization.append(tmp_summary) test_rouge(list_of_summarization, abstracts, 8)