コード例 #1
0
    def extract_summary(self, ):
        data = read_json(self.full_path)

        articles = []
        abstracts = []
        oracle_ids = []

        for item in data:
            articles.append(item['article'])
            abstracts.append(item['abstract'])
            if 'NYT' in self.full_path:
                oracle_ids.append(item['oracle_sens'])
            else:
                oracle_ids.append(item['oracle'])
        data_iterator = zip(articles, abstracts, oracle_ids)
        # print(data_iterator)

        summaries = []
        references = []

        for item in tqdm(data_iterator, desc="Oracle:"):
            article, abstract, oracle_id = item
            summary = [article[x] for x in oracle_id]
            summaries.append(summary)
            references.append([abstract])


        # result = evaluate_rouge(summaries, references, remove_temp=True, rouge_args=[])
        result = test_rouge(summaries, references, self.processors)
        return result
コード例 #2
0
    def extract_summary(self, ):
        data = read_json(self.full_path)
        articles = []
        abstracts = []

        for item in data:
            articles.append(item['article'])
            abstracts.append([item['abstract']])
        
        lxr = LexRank(articles, stopwords=STOPWORDS['en'])
 
        summaries = [lxr.get_summary(x, summary_size=self.summary_size, threshold=self.threshold) for x in tqdm(articles, desc="LexRank:")]
        res = test_rouge(summaries, abstracts, self.processors)

        return res
コード例 #3
0
    def extract_summary(self, ):
        data = read_json(self.full_path)
        articles = []
        abstracts = []

        for item in data:
            articles.append("\n".join(item['article']))
            abstracts.append([item['abstract']])
        summaries = []

        for article in tqdm(articles, desc="Text Rank:"):
            summaries.append(summarize(article, split=True, ratio=self.extract_ratio))

        res = test_rouge(summaries, abstracts, self.processors)
        return res
コード例 #4
0
    def extract_summary(self, ):
        data = read_json(self.full_path)

        articles = []
        abstracts = []

        for item in data:
            articles.appenditem['article'])
            abstracts.append(item['abstract'])
        data_iterator = zip(articles, abstracts)

        summaries = []
        references = []

        for item in tqdm(data_iterator, desc="Lead:"):
            article, abstract = item
            summary = article[:self.extract_num]
            summaries.append(summary)
            references.append([abstract])

        result = test_rouge(summaries, references, self.processors)
        return result
コード例 #5
0
 def _report_rouge(self, gold_path, can_path):
     self.logger.info("Calculating Rouge")
     results_dict = test_rouge(self.args.temp_dir, can_path, gold_path)
     return results_dict
コード例 #6
0
if __name__=='__main__':
    data = read_json(pjoin(sys.argv[1], f"{sys.argv[2]}.json"))
    articles = []
    abstracts = []
    
    for item in data:
        articles.append(item['article'])
        abstracts.append([item['abstract']]) 
    
    list_of_summarization = []
    for i in range(len(articles)):
        sample = articles[i]
        sentences = processFile(sample)
        original_sentences = [ sent.originalWords for sent in sentences ]
        IDF_w 		= IDFs(sentences)
        TF_IDF_w 	= TF_IDF(sentences)
        # build query; set the number of words to include in our query
        query = buildQuery(sentences, TF_IDF_w, 8)	
        # pick a sentence that best matches the query	
        best1sentence = bestSentence(sentences, query, IDF_w)
        summary = makeSummary(sentences, best1sentence, query, 100, 0.5, IDF_w)
        tmp_summary = []
        for sent in summary:
            original_sent = sent.getOriginalWords()
            tmp_summary.append(original_sent)
        
        list_of_summarization.append(tmp_summary)
    
    test_rouge(list_of_summarization, abstracts, 8)