def get_scores(self, p1, p2, eval_type, r=None): """ prints the relevant statistics and f-scores. eval_type can be 'unlabeled', 'labeled' or 'weak_labeled'. calculates a set of all the yields such that both passages have a unit with that yield. :param p1: passage to compare :param p2: reference passage object :param eval_type: evaluation type to use, out of EVAL_TYPES 1. UNLABELED: it doesn't matter what labels are there. 2. LABELED: also requires tag match (if there are multiple units with the same yield, requires one match) 3. WEAK_LABELED: also requires weak tag match (if there are multiple units with the same yield, requires one match) :param r: reference passage for fine-grained evaluation :returns EvaluatorResults object if self.fscore is True, otherwise None """ self.mutual.clear() self.error_counters.clear() reference_yield_tags = None if r is None else create_passage_yields(r, punct=True)[ALL_EDGES.name] maps = [{}, create_passage_yields(p2, self.constructions, reference_yield_tags=reference_yield_tags)] if p1 is not None: maps[0] = create_passage_yields(p1, self.constructions, reference=p2, reference_yield_tags=reference_yield_tags) ordered_constructions = [c for c in self.constructions if c in maps[0] or c in maps[1]] ordered_constructions += [c for c in maps[1] if c not in ordered_constructions] ordered_constructions += [c for c in maps[0] if c not in ordered_constructions] for construction in ordered_constructions: yield_tags1 = maps[0].get(construction, {}) yield_tags2 = maps[1].get(construction, {}) self.find_mutuals(yield_tags1, yield_tags2, eval_type, construction) if self.verbose: print("Evaluation type: (" + eval_type + ")") only = [{c: {y: tags for y, tags in d.items() if y not in self.mutual[c]} for c, d in m.items()} for m in maps] if self.verbose and self.units and p1 is not None: print("==> Mutual Units:") print_tags_and_text(p1, self.mutual[PRIMARY]) print("==> Only in guessed:") print_tags_and_text(p1, only[0][PRIMARY]) print("==> Only in reference:") print_tags_and_text(p2, only[1][PRIMARY]) error_counters = self.error_counters.get(eval_type, {}) res = EvaluatorResults((c, SummaryStatistics(len(self.mutual[c]), len(only[0].get(c, ())), len(only[1].get(c, ())), error_counters.get(c))) for c in self.mutual) if self.verbose: if self.fscore: res.print() if self.errors and error_counters: res.print_confusion_matrix() return res
def get_scores(self, p1, p2, eval_type, r=None): """ prints the relevant statistics and f-scores. eval_type can be 'unlabeled', 'labeled' or 'weak_labeled'. calculates a set of all the yields such that both passages have a unit with that yield. :param p1: passage to compare :param p2: reference passage object :param eval_type: evaluation type to use, out of EVAL_TYPES 1. UNLABELED: it doesn't matter what labels are there. 2. LABELED: also requires tag match (if there are multiple units with the same yield, requires one match) 3. WEAK_LABELED: also requires weak tag match (if there are multiple units with the same yield, requires one match) :param r: reference passage for fine-grained evaluation :returns: EvaluatorResults object if self.fscore is True, otherwise None """ mutual = OrderedDict() counters = OrderedDict() if self.errors and eval_type == LABELED else None passage_yields = create_passage_yields(r or p2) reference_yield_tags = passage_yields[ALL_EDGES.name] if passage_yields else None maps = [{} if p is None else create_passage_yields(p, self.constructions, tags=False, reference=p2, reference_yield_tags=reference_yield_tags) for p in (p1, p2)] if p1 is not None: ordered_constructions = [c for c in self.constructions if any(c in m for m in maps)] for m in maps[::-1]: ordered_constructions += [c for c in m if c not in ordered_constructions] for construction in ordered_constructions: yield_cands = [m.get(construction, {}) for m in maps] self.find_mutuals(*yield_cands, eval_type=eval_type, mutual_tags=mutual.setdefault(construction, {}), counter=None if counters is None else counters.setdefault(construction, Counter())) only = [{c: {y: tags for y, tags in d.items() if y not in mutual[c]} for c, d in m.items()} for m in maps] res = EvaluatorResults((c, SummaryStatistics(len(mutual[c]), len(only[0].get(c, ())), len(only[1].get(c, ())), None if counters is None else counters.get(c))) for c in mutual) if self.verbose: print("Evaluation type: (" + eval_type + ")") if self.units and p1 is not None: print("==> Mutual Units:") print_tags_and_text(p1, mutual) print("==> Only in guessed:") print_tags_and_text(p1, only[0]) print("==> Only in reference:") print_tags_and_text(p2, only[1]) if self.fscore: res.print() return res
def get_scores(self, p1, p2, eval_type, r=None): """ prints the relevant statistics and f-scores. eval_type can be 'unlabeled', 'labeled' or 'weak_labeled'. calculates a set of all the yields such that both passages have a unit with that yield. :param p1: passage to compare :param p2: reference passage object :param eval_type: evaluation type to use, out of EVAL_TYPES 1. UNLABELED: it doesn't matter what labels are there. 2. LABELED: also requires tag match (if there are multiple units with the same yield, requires one match) 3. WEAK_LABELED: also requires weak tag match (if there are multiple units with the same yield, requires one match) :param r: reference passage for fine-grained evaluation :returns: EvaluatorResults object if self.fscore is True, otherwise None """ mutual = OrderedDict() counters = OrderedDict() if self.errors and eval_type == LABELED else None passage_yields = create_passage_yields(r or p2) reference_yield_tags = passage_yields[ALL_EDGES.name] if passage_yields else None maps = [{} if p is None else create_passage_yields(p, self.constructions, tags=False, reference=p2, reference_yield_tags=reference_yield_tags) for p in (p1, p2)] if p1 is not None: ordered_constructions = [c for c in self.constructions if any(c in m for m in maps)] for m in maps[::-1]: ordered_constructions += [c for c in m if c not in ordered_constructions] for construction in ordered_constructions: yield_cands = [m.get(construction, {}) for m in maps] self.find_mutuals(*yield_cands, eval_type=eval_type, mutual_tags=mutual.setdefault(construction, {}), counter=None if counters is None else counters.setdefault(construction, Counter())) only = [{c: {y: tags for y, tags in d.items() if y not in mutual[c]} for c, d in m.items()} for m in maps] res = EvaluatorResults((c, SummaryStatistics(len(mutual[c]), len(only[0].get(c, ())), len(only[1].get(c, ())), None if counters is None else counters.get(c))) for c in mutual) if self.verbose: print("Evaluation type: (" + eval_type + ")") if self.units and p1 is not None: print("==> Mutual Units:") print_tags_and_text(p1, mutual) print("==> Only in guessed:") print_tags_and_text(p1, only[0]) print("==> Only in reference:") print_tags_and_text(p2, only[1]) if self.fscore: res.print() return res
def get_scores(self, s1, s2, eval_type, r=None): """ :param s1: sentence to compare :param s2: reference sentence :param eval_type: evaluation type to use, out of EVAL_TYPES 1. UNLABELED: disregard dependency relation labels. 2. LABELED: also requires relation match :param r: reference passage for fine-grained evaluation :param verbose: print extra information :param units: print all matches and mismatches :returns EvaluatorResults """ self.reference_yield_tags = None if r is None else create_passage_yields( r, punct=True)[ALL_EDGES.name] converter = ConlluConverter() g1, g2 = list(map(list, list(map(converter.generate_graphs, (s1, s2))))) t1, t2 = list(map(join_tokens, (g1, g2))) assert t1 == t2, "Tokens do not match: '%s' != '%s'" % diff(t1, t2) maps = [self.map_by_construction(gs, eval_type) for gs in (g1, g2)] ordered_constructions = [ c for c in self.constructions if c in maps[0] or c in maps[1] or c == PRIMARY ] ordered_constructions += [ c for m in maps[::-1] for c in m if c not in ordered_constructions ] matches = OrderedDict() for construction in ordered_constructions: g, r = [m.get(construction, set()) for m in maps] matches[construction] = (g & r, g - r, r - g) res = EvaluatorResults((c, SummaryStatistics(*list(map(len, m)))) for c, m in matches.items()) if self.verbose or self.units: print() print("Evaluation type: (" + eval_type + ")") if self.units: for c, ms in matches.items(): print(c.description + ":") for title, m in zip(("Mutual Units", "Only in guessed", "Only in reference"), ms): print("==> %s:" % title) print(", ".join( map(str, sorted(m, key=lambda e: e.dependent.position)))) print() if self.verbose: res.print() return res