def sentence_rouge(reflex, genlex): rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, \ length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5) genlex = [[genlex,]] reflex = [[[reflex,]]] setting_file = rouge.setting(files=False, summary=genlex, reference=reflex) result = rouge.eval_rouge(setting_file, recall_only=False, ROUGE_path=ROUGE_path, data_path=data_path) return result['ROUGE-L-F']
def evaluate_rouge_scores(evaluation_file_name): summaries = [] # model-generated references = [] # human-generated # articles = {} with gzip.open(evaluation_file_name) as json_file: json_data = json_file.read() data = json.loads(json_data) print("%d entries..." % len(data)) for example in data: # datum = example['data'] # if not datum in articles: # articles[datum] = True summaries.append( remove_tags(example['prediction']).encode('utf-8').split()) references.append([ remove_tags(example).encode('utf-8').split() for example in example['label'] ]) print("%d entries are used for evaluation." % len(summaries)) # DEBUG: print a couple examples and their respective ROUGE scores # print(zip(summaries[5:10], references[5:10])) # rouge = Pythonrouge(n_gram=2, ROUGE_SU4=False, ROUGE_L=True, stemming=False, stopwords=False, word_level=True, length_limit=False, length=50, use_cf=True, cf=95, scoring_formula="average", resampling=False, samples=500, favor=False, p=0.5) # setting_file = rouge.setting(files=False, summary=summaries[5:10], reference=references[5:10]) # print(rouge.eval_rouge(setting_file, recall_only=False, ROUGE_path=ROUGE_PATH, data_path=ROUGE_DATA, f_measure_only=False)) rouge = Pythonrouge(n_gram=2, ROUGE_SU4=False, ROUGE_L=True, stemming=False, stopwords=False, word_level=True, length_limit=False, length=50, use_cf=True, cf=95, scoring_formula="average", resampling=False, samples=500, favor=False, p=0.5) setting_file = rouge.setting(files=False, summary=summaries, reference=references) result = rouge.eval_rouge(setting_file, recall_only=False, ROUGE_path=ROUGE_PATH, data_path=ROUGE_DATA, f_measure_only=False) return result
def evaluate(system_summary, reference_summaries, stemming=False, stopwords=False, use_cf=False, ngram=2): ROUGE_path = "rouge_files/ROUGE-1.5.5/ROUGE-1.5.5.pl" data_path = "rouge_files/ROUGE-1.5.5/data/" # initialize setting of ROUGE, eval ROUGE-1, 2 rouge = Pythonrouge(n_gram=ngram, ROUGE_SU4=False, ROUGE_L=False, stemming=stemming, stopwords=stopwords, word_level=True, length_limit=True, length=100, use_cf=use_cf, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5) # system summary: list of summaries, where each summary is a list of sentences summary = [system_summary] # reference summaries: list of (list of summaries per article), where each summary is a list of sentences reference = [[[summary] for summary in reference_summaries]] setting_file = rouge.setting(files=False, summary=summary, reference=reference, temp_root='') result = rouge.eval_rouge(setting_file, ROUGE_path=ROUGE_path, data_path=data_path) return result
from __future__ import print_function import sys from pythonrouge.pythonrouge import Pythonrouge if __name__ == '__main__': ROUGE_path = "./pythonrouge/RELEASE-1.5.5/ROUGE-1.5.5.pl" data_path = "./pythonrouge/RELEASE-1.5.5/data" summary_dir = "./sample/summary/" reference_dir = "./sample/reference/" # setting rouge options rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5) print("evaluate sumamry & reference in these dir\nsummary: {}\nreference: {}".format(summary_dir, reference_dir)) print("\nAll metric") setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir) print(rouge.eval_rouge(setting_file, ROUGE_path=ROUGE_path, data_path=data_path)) print("\nRecall Only and save setting.xml") setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir, delete=False) print(rouge.eval_rouge(setting_file, recall_only=True, ROUGE_path=ROUGE_path, data_path=data_path)) print("\nEvaluate ROUGE based on sentecnce lists") summary = [["Great location, very good selection of food for breakfast buffet.", "Stunning food, amazing service.", "The food is excellent and the service great."], ["The keyboard, more than 90% standard size, is just large enough .", "Surprisingly readable screen for the size .", "Smaller size videos play even smoother ."]] reference = [[["Food was excellent with a wide range of choices and good services.", "It was a bit expensive though."], ["Food can be a little bit overpriced, but is good for a hotel."], ["The food in the hotel was a little over priced but excellent in taste and choice.", "There were also many choices to eat in the near vicinity of the hotel."], ["The food is good, the service great.",
# initialize setting of ROUGE, eval ROUGE-1, 2, SU4, L rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stopwords=True) path = "../Summaries" modelGeneratedSummariesPath = path + "/modelGenerated/1.txt" goldReferenceSummariesPath = path + "/goldReference/1.txt" avgRougeScoreCounter = Counter({}) with open(modelGeneratedSummariesPath) as f: modelLines = f.readlines() with open(goldReferenceSummariesPath) as f: goldLines = f.readlines() noOfDocs = 0 for (modelDocSummary, goldDocSummary) in izip(modelLines, goldLines): if (modelDocSummary.__len__() == 1): continue modelSummaryInputToRouge = [[modelDocSummary]] goldSummaryInputToRouge = [[[goldDocSummary]]] setting_file = rouge.setting(files=False, summary=modelSummaryInputToRouge, reference=goldSummaryInputToRouge) rougeScore = rouge.eval_rouge(setting_file, recall_only=True) avgRougeScoreCounter += Counter(rougeScore) noOfDocs += 1 avgRougeScore = dict( map(lambda (k, v): (k, v / float(noOfDocs)), avgRougeScoreCounter.items())) print avgRougeScore
ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5) # system summary & reference summary summary = [[" Tokyo is the one of the biggest city in the world."]] reference = [[[ "The capital of Japan, Tokyo, is the center of Japanese economy." ]]] # If you evaluate ROUGE by sentence list as above, set files=False setting_file = rouge.setting(files=False, summary=summary, reference=reference) # If you need only recall of ROUGE metrics, set recall_only=True result = rouge.eval_rouge(setting_file, recall_only=True, ROUGE_path=ROUGE_path, data_path=data_path) print(result)
import sys ROUGE_path = sys.argv[1] #ROUGE-1.5.5.pl data_path = sys.argv[2] #data folder in RELEASE-1.5.5 summary_dir = sys.argv[3] reference_dir = sys.argv[4] # initialize setting of ROUGE, eval ROUGE-1~4, SU4 rouge = Pythonrouge(n_gram=4, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5) # make a setting file, set files=True because you've already save files in specific directories setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir) result = rouge.eval_rouge(setting_file, ROUGE_path=ROUGE_path, data_path=data_path) print result