def compare_sentences(self, g, p): if len(g.words) != len(p.words): tsvlib.global_last_lineno(None, 0) len_g, len_p = len(g.words), len(p.words) tsvlib.warn("Sentence sizes do not match\n" \ "In sentence starting at `{args.gold_file.name}` line {g.lineno_beg} ({len_g} tokens)\n" \ "In sentence starting at `{args.prediction_file.name}` line {p.lineno_beg} ({len_p} tokens)", g=g, p=p, args=self.args, len_g=len_g, len_p=len_p)
def __init__(self, args): sys.excepthook = tsvlib.excepthook self.args = args # Gold = test.cupt; Pred = test.system.cupt if "test.cupt" in self.args.prediction_file.name or "system" in self.args.gold_file.name: tsvlib.warn("Something looks wrong in the gold & system arguments.\n" \ "Is `{gold_file.name}` really the gold test.cupt file?\n" \ "Is `{pred_file.name}` really a system prediction file?", gold_file=self.args.gold_file, pred_file=self.args.prediction_file)
def __init__(self, args): self.args = args self.categs_to_filter = frozenset(self.args.filter_categs.split(",")) \ if self.args.filter_categs else None # Gold = test.parsemetsv; Pred = test.system.parsemetsv if "test.parsemetsv" in self.args.prediction_file.name or "system" in self.args.gold_file.name: tsvlib.warn("Something looks wrong in the gold & system arguments.\n" \ "Is `{gold_file.name}` really the gold test.parsemetsv file?\n" \ "Is `{pred_file.name}` really a system prediction file?", gold_file=self.args.gold_file, pred_file=self.args.prediction_file)
def error(message, **kwargs): r"""Print error message and quit.""" tsvlib.warn(message, warntype="ERROR", **kwargs) sys.exit(1)
def run(self): if self.args.debug: print("DEBUG: LEGEND: {} {} {} {}".format( GOLDPRED_FMT[(False, False)].format('normal-text'), GOLDPRED_FMT[(True, False)].format('gold-only'), GOLDPRED_FMT[(False, True)].format('pred-only'), GOLDPRED_FMT[(True, True)].format('gold-pred-matched'))) print("DEBUG:") mc_args = dict(debug=self.args.debug, tractable=not self.args.combinatorial) self.gold = collections.deque( tsvlib.iter_tsv_sentences(self.args.gold_file)) self.pred = collections.deque( tsvlib.iter_tsv_sentences(self.args.prediction_file)) seen = SeenInfo(self.args.train_file) base_stats = Statistics(mc_args) categ2stats = collections.defaultdict(lambda: Statistics(mc_args)) continuity2stats = collections.defaultdict(lambda: Statistics(mc_args)) multitokenness2stats = collections.defaultdict( lambda: Statistics(mc_args)) field_whetherseen2stats = collections.defaultdict( lambda: Statistics(mc_args)) # dict[(field, bool)] -> stats field_variantness2stats = collections.defaultdict( lambda: Statistics(mc_args)) # dict[(field, bool)] -> stats while self.gold or self.pred: self.check_eof() sent_gold = self.gold.popleft() sent_pred = self.pred.popleft() sent_gold.absorb_mwes_from_contraction_ranges() sent_pred.absorb_mwes_from_contraction_ranges() if self.args.debug: self.print_debug_pairing(sent_gold, sent_pred) self.compare_sentences(sent_gold, sent_pred) categories = self.mwe_categs(sent_gold) | self.mwe_categs( sent_pred) mweinfos_gold = sent_gold.mwe_infos().values() mweinfos_pred = sent_pred.mwe_infos().values() self.add_to_stats(sent_gold, base_stats, mweinfos_gold, mweinfos_pred, debug_header="Global:") for category in list(sorted(categories, key=str)): g = self.mweinfos_per_categ(mweinfos_gold, category) p = self.mweinfos_per_categ(mweinfos_pred, category) self.add_to_stats(sent_gold, categ2stats[category], g, p, debug_header="Category {}:".format( category or UNLABELED)) for continuity in [True, False]: g = self.mweinfo_per_continuity(mweinfos_gold, continuity) p = self.mweinfo_per_continuity(mweinfos_pred, continuity) self.add_to_stats(sent_gold, continuity2stats[continuity], g, p, debug_header="Continuous:" if continuity else "Discontinuous:") for multitokenness in [True, False]: g = self.mweinfo_per_multitokenness(mweinfos_gold, multitokenness) p = self.mweinfo_per_multitokenness(mweinfos_pred, multitokenness) self.add_to_stats(sent_gold, multitokenness2stats[multitokenness], g, p, debug_header="{}-token:".format( "Multi" if multitokenness else "Single")) if self.args.train_file: for whetherseen in [True, False]: g = seen.mweinfo_per_whetherseen(mweinfos_gold, "LEMMA", whetherseen) p = seen.mweinfo_per_whetherseen(mweinfos_pred, "LEMMA", whetherseen) self.add_to_stats(sent_gold, field_whetherseen2stats[("LEMMA", whetherseen)], g, p, debug_header="{}-in-train:".format( "Seen" if whetherseen else "Unseen")) for variantness in [True, False]: # We interpret variantness==False as "MWEs that were seen and are identical" g = seen.mweinfo_per_variantness(mweinfos_gold, "LEMMA", "FORM", variantness) p = seen.mweinfo_per_variantness(mweinfos_pred, "LEMMA", "FORM", variantness) self.add_to_stats( sent_gold, field_variantness2stats[("LEMMA", "FORM", variantness)], g, p, debug_header="{}-train:".format( "Variant-of" if variantness else "Identical-to")) if self.args.debug: print("DEBUG:") #------------------------------------------ print("## Global evaluation") base_stats.print_stats(prefix='') print() print("## Per-category evaluation (partition of Global)") for category in sorted(categ2stats, key=str): prefix = '{}: '.format(category or UNLABELED) categ2stats[category].print_mwebased_proportion( prefix, baseline=base_stats) categ2stats[category].print_stats(prefix) print() print("## MWE continuity (partition of Global)") for continuity in [True, False]: prefix = "Continuous: " if continuity else "Discontinuous: " continuity2stats[continuity].print_mwebased_proportion( prefix, baseline=base_stats) continuity2stats[continuity].c_mwebased.print_p_r_f(prefix) print() print("## Number of tokens (partition of Global)") for multitokenness in [True, False]: prefix = "{}-token: ".format( "Multi" if multitokenness else "Single") multitokenness2stats[multitokenness].print_mwebased_proportion( prefix, baseline=base_stats) multitokenness2stats[multitokenness].c_mwebased.print_p_r_f(prefix) print() if self.args.train_file: if not seen.mwe_fieldindex_sets["LEMMA"]: tsvlib.warn( "found no MWEs in training file (in field={field_name})", field_name="LEMMA", position='') else: print("## Whether seen in train (partition of Global)") for whetherseen in [True, False]: prefix = "{}-in-train: ".format( "Seen" if whetherseen else "Unseen") field_whetherseen2stats[("LEMMA", whetherseen)] \ .print_mwebased_proportion(prefix, baseline=base_stats) field_whetherseen2stats[( "LEMMA", whetherseen)].c_mwebased.print_p_r_f(prefix) print() print( "## Whether identical to train (partition of Seen-in-train)" ) for variantness in [True, False]: prefix = "{}-train: ".format( "Variant-of" if variantness else "Identical-to") field_variantness2stats[("LEMMA", "FORM", variantness)] \ .print_mwebased_proportion(prefix, baseline=field_whetherseen2stats[("LEMMA", True)]) field_variantness2stats[( "LEMMA", "FORM", variantness)].c_mwebased.print_p_r_f(prefix) print()
def warn(self, *args, **kwargs): self.warned = True tsvlib.warn(*args, **kwargs)