def test_n_score(self): models = PerlScriptUtils._parse_models(self.models_path) systems = PerlScriptUtils._parse_systems(self.systems_path) # no swr, no stem n_scores = {} for system_id, cand_texts in tqdm(systems.items()): if system_id not in n_scores: n_scores[system_id] = {} for topic_id, cand_text in cand_texts.items(): if topic_id not in n_scores[system_id]: n_scores[system_id][topic_id] = {} ref_texts = models[topic_id].values() n_score = self.rouge.n_score(ref_texts, cand_text) n_scores[system_id][topic_id] = n_score n_scores_perl = PerlScriptUtils.parse_rouge_perl_out("duc2005_subset/rouge_perl.out") for system_id, pyrouge_topics in n_scores.items(): for topic_id, pyrouge_scores in pyrouge_topics.items(): scores_perl = n_scores_perl[system_id][topic_id] for n, scores in scores_perl.items(): for k,v in scores.items(): # ROUGE truncates, while we round. self.assertAlmostEqual(v, pyrouge_scores[n][k], 4, "Results different from original ROUGE.") # swr, no stem self.rouge = Rouge.from_rouge155_args({"s": True}) n_scores = {} for system_id, cand_texts in tqdm(systems.items()): if system_id not in n_scores: n_scores[system_id] = {} for topic_id, cand_text in cand_texts.items(): if topic_id not in n_scores[system_id]: n_scores[system_id][topic_id] = {} ref_texts = models[topic_id].values() n_score = self.rouge.n_score(ref_texts, cand_text) n_scores[system_id][topic_id] = n_score n_scores_perl = PerlScriptUtils.parse_rouge_perl_out("duc2005_subset/rouge_perl_swr.out") for system_id, pyrouge_topics in n_scores.items(): for topic_id, pyrouge_scores in pyrouge_topics.items(): scores_perl = n_scores_perl[system_id][topic_id] for n, scores in scores_perl.items(): for k,v in scores.items(): # ROUGE truncates, while we round. self.assertAlmostEqual(v, pyrouge_scores[n][k], 4, "Results different from original ROUGE (swr).") # stem, no swr self.rouge = Rouge.from_rouge155_args({"m": True}) n_scores = {} for system_id, cand_texts in tqdm(systems.items()): if system_id not in n_scores: n_scores[system_id] = {} for topic_id, cand_text in cand_texts.items(): if topic_id not in n_scores[system_id]: n_scores[system_id][topic_id] = {} ref_texts = models[topic_id].values() n_score = self.rouge.n_score(ref_texts, cand_text) n_scores[system_id][topic_id] = n_score n_scores_perl = PerlScriptUtils.parse_rouge_perl_out("duc2005_subset/rouge_perl_stem.out") for system_id, pyrouge_topics in n_scores.items(): for topic_id, pyrouge_scores in pyrouge_topics.items(): scores_perl = n_scores_perl[system_id][topic_id] for n, scores in scores_perl.items(): for k,v in scores.items(): # ROUGE truncates, while we round. self.assertAlmostEqual(v, pyrouge_scores[n][k], 4, "Results different from original ROUGE (stem).") # stem, swr self.rouge = Rouge.from_rouge155_args({"m": True, "s": True}) n_scores = {} for system_id, cand_texts in tqdm(systems.items()): if system_id not in n_scores: n_scores[system_id] = {} for topic_id, cand_text in cand_texts.items(): if topic_id not in n_scores[system_id]: n_scores[system_id][topic_id] = {} ref_texts = models[topic_id].values() n_score = self.rouge.n_score(ref_texts, cand_text) n_scores[system_id][topic_id] = n_score n_scores_perl = PerlScriptUtils.parse_rouge_perl_out("duc2005_subset/rouge_perl_stem_swr.out") for system_id, pyrouge_topics in n_scores.items(): for topic_id, pyrouge_scores in pyrouge_topics.items(): scores_perl = n_scores_perl[system_id][topic_id] for n, scores in scores_perl.items(): for k,v in scores.items(): # ROUGE truncates, while we round. self.assertAlmostEqual(v, pyrouge_scores[n][k], 4, "Results different from original ROUGE (stem, swr).")
def setUp(self): self.rouge = Rouge.from_rouge155_args() self.models_path = Path("duc2005_subset/models") self.systems_path = Path("duc2005_subset/peers")