/
evaluator.py
105 lines (92 loc) · 4.35 KB
/
evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import pdb
import pandas as pd
from annotation import Annotation
import scorer
class Evaluator():
""" Superclass for evaluation, holding evaluation methods common to BookNLP,
FanfictionNLP, and any other systems.
"""
def __init__(self, fic_csv_dirpath,
coref_settings, quote_settings,
evaluate_coref=False, evaluate_quotes=False,
coref_from='pipeline', quotes_from='pipeline',
run_quote_attribution=False,
scores_outpath=None,
dataset_name=None,
):
""" Args:
coref_settings, quote_settings: NamedTuples with:
.mod_dirpath
.mod_ext
.gold_dirpath
.gold_ext
.preds_outpath
"""
self.fic_csv_dirpath = fic_csv_dirpath
self.whether_evaluate_coref = evaluate_coref
self.whether_evaluate_quotes = evaluate_quotes
self.coref_from = coref_from
self.quotes_from = quotes_from
self.run_quote_attribution = run_quote_attribution
self.coref_settings = coref_settings
self.quote_settings = quote_settings
self.scores_outpath = scores_outpath
self.dataset_name = dataset_name
def evaluate_coref(self, fandom_fname, fic_representation, save=True):
""" Evaluate coref for a fic.
Args:
save: save AnnotatedSpan objects in a pickled file in a tmp directory
"""
# Load gold mentions
gold = Annotation(self.coref_settings.gold_dirpath, fandom_fname,
file_ext=self.coref_settings.gold_ext,
fic_csv_dirpath=self.fic_csv_dirpath)
gold.extract_annotated_spans()
# Load predicted mentions
fic_representation.extract_character_mentions(
save_dirpath=self.coref_settings.preds_outpath)
# Get scores
coref_scores = scorer.coref_scores(
fic_representation.character_mentions, gold.annotations, exact_match=True)
print('\tCoref results:')
for key in ['lea_f1', 'lea_precision', 'lea_recall']:
print(f'\t\t{key}: {coref_scores[key]: .2%}')
print()
return coref_scores
def evaluate_quotes(self, fandom_fname, fic_representation, save=True, exact_match=True):
""" Evaluate quotes for a fic.
Args:
save: save AnnotatedSpan quote objects in a pickled file in a tmp directory
"""
# Quote extraction evaluation
# Load gold quote spans
gold = Annotation(self.quote_settings.gold_dirpath, fandom_fname, file_ext=self.quote_settings.gold_ext, fic_csv_dirpath=self.fic_csv_dirpath)
gold.extract_annotated_spans()
# Load predicted quote spans
fic_representation.extract_quotes(
save_dirpath=self.quote_settings.preds_outpath,
coref_from=self.coref_from)
# Get scores
quote_scores, quote_groups = scorer.quote_scores(fic_representation.quotes, gold.annotations, exact_match=exact_match)
print('\tQuote extraction results:')
for key in ['extraction_f1', 'extraction_precision', 'extraction_recall']:
print(f'\t\t{key}: {quote_scores[key]: .2%}')
print('\tQuote attribution results:')
for key in ['attribution_f1', 'attribution_precision', 'attribution_recall']:
print(f'\t\t{key}: {quote_scores[key]: .2%}')
print()
return quote_scores, quote_groups
def save_scores(self, scores, system_name, params):
""" Save scores to a CSV in self.output_dirpath
Args:
scores: scores as a list of dicts
params: list of parameters to add to self.dataset_name in output filepath
"""
outpath = os.path.join(self.scores_outpath, self.dataset_name, system_name, f'{"_".join(params)}_scores.csv')
if not os.path.exists(os.path.join(self.scores_outpath, self.dataset_name)):
os.mkdir(os.path.join(self.scores_outpath, self.dataset_name))
if not os.path.exists(os.path.join(self.scores_outpath, self.dataset_name, system_name)):
os.mkdir(os.path.join(self.scores_outpath, self.dataset_name, system_name))
pd.DataFrame(scores).to_csv(outpath, index=False)
print(f"Saved scores to {outpath}")