Esempio n. 1
0
    def get_scores(self, p1, p2, eval_type, r=None):
        """
        prints the relevant statistics and f-scores. eval_type can be 'unlabeled', 'labeled' or 'weak_labeled'.
        calculates a set of all the yields such that both passages have a unit with that yield.
        :param p1: passage to compare
        :param p2: reference passage object
        :param eval_type: evaluation type to use, out of EVAL_TYPES
        1. UNLABELED: it doesn't matter what labels are there.
        2. LABELED: also requires tag match (if there are multiple units with the same yield, requires one match)
        3. WEAK_LABELED: also requires weak tag match (if there are multiple units with the same yield,
                         requires one match)
        :param r: reference passage for fine-grained evaluation
        :returns EvaluatorResults object if self.fscore is True, otherwise None
        """
        self.mutual.clear()
        self.error_counters.clear()
        reference_yield_tags = None if r is None else create_passage_yields(r, punct=True)[ALL_EDGES.name]
        maps = [{}, create_passage_yields(p2, self.constructions,
                                          reference_yield_tags=reference_yield_tags)]
        if p1 is not None:
            maps[0] = create_passage_yields(p1, self.constructions,
                                            reference=p2, reference_yield_tags=reference_yield_tags)
            ordered_constructions = [c for c in self.constructions if c in maps[0] or c in maps[1]]
            ordered_constructions += [c for c in maps[1] if c not in ordered_constructions]
            ordered_constructions += [c for c in maps[0] if c not in ordered_constructions]
            for construction in ordered_constructions:
                yield_tags1 = maps[0].get(construction, {})
                yield_tags2 = maps[1].get(construction, {})
                self.find_mutuals(yield_tags1, yield_tags2, eval_type, construction)

        if self.verbose:
            print("Evaluation type: (" + eval_type + ")")

        only = [{c: {y: tags for y, tags in d.items() if y not in self.mutual[c]} for c, d in m.items()} for m in maps]
        if self.verbose and self.units and p1 is not None:
            print("==> Mutual Units:")
            print_tags_and_text(p1, self.mutual[PRIMARY])
            print("==> Only in guessed:")
            print_tags_and_text(p1, only[0][PRIMARY])
            print("==> Only in reference:")
            print_tags_and_text(p2, only[1][PRIMARY])

        error_counters = self.error_counters.get(eval_type, {})
        res = EvaluatorResults((c, SummaryStatistics(len(self.mutual[c]),
                                                     len(only[0].get(c, ())),
                                                     len(only[1].get(c, ())),
                                                     error_counters.get(c)))
                               for c in self.mutual)
        if self.verbose:
            if self.fscore:
                res.print()
            if self.errors and error_counters:
                res.print_confusion_matrix()

        return res
Esempio n. 2
0
    def get_scores(self, p1, p2, eval_type, r=None):
        """
        prints the relevant statistics and f-scores. eval_type can be 'unlabeled', 'labeled' or 'weak_labeled'.
        calculates a set of all the yields such that both passages have a unit with that yield.
        :param p1: passage to compare
        :param p2: reference passage object
        :param eval_type: evaluation type to use, out of EVAL_TYPES
        1. UNLABELED: it doesn't matter what labels are there.
        2. LABELED: also requires tag match (if there are multiple units with the same yield, requires one match)
        3. WEAK_LABELED: also requires weak tag match (if there are multiple units with the same yield,
                         requires one match)
        :param r: reference passage for fine-grained evaluation
        :returns: EvaluatorResults object if self.fscore is True, otherwise None
        """
        mutual = OrderedDict()
        counters = OrderedDict() if self.errors and eval_type == LABELED else None
        passage_yields = create_passage_yields(r or p2)
        reference_yield_tags = passage_yields[ALL_EDGES.name] if passage_yields else None
        maps = [{} if p is None else create_passage_yields(p, self.constructions, tags=False, reference=p2,
                                                           reference_yield_tags=reference_yield_tags) for p in (p1, p2)]
        if p1 is not None:
            ordered_constructions = [c for c in self.constructions if any(c in m for m in maps)]
            for m in maps[::-1]:
                ordered_constructions += [c for c in m if c not in ordered_constructions]
            for construction in ordered_constructions:
                yield_cands = [m.get(construction, {}) for m in maps]
                self.find_mutuals(*yield_cands, eval_type=eval_type, mutual_tags=mutual.setdefault(construction, {}),
                                  counter=None if counters is None else counters.setdefault(construction, Counter()))

        only = [{c: {y: tags for y, tags in d.items() if y not in mutual[c]} for c, d in m.items()} for m in maps]
        res = EvaluatorResults((c, SummaryStatistics(len(mutual[c]), len(only[0].get(c, ())), len(only[1].get(c, ())),
                                                     None if counters is None else counters.get(c))) for c in mutual)
        if self.verbose:
            print("Evaluation type: (" + eval_type + ")")
            if self.units and p1 is not None:
                print("==> Mutual Units:")
                print_tags_and_text(p1, mutual)
                print("==> Only in guessed:")
                print_tags_and_text(p1, only[0])
                print("==> Only in reference:")
                print_tags_and_text(p2, only[1])
            if self.fscore:
                res.print()
        return res
Esempio n. 3
0
    def get_scores(self, p1, p2, eval_type, r=None):
        """
        prints the relevant statistics and f-scores. eval_type can be 'unlabeled', 'labeled' or 'weak_labeled'.
        calculates a set of all the yields such that both passages have a unit with that yield.
        :param p1: passage to compare
        :param p2: reference passage object
        :param eval_type: evaluation type to use, out of EVAL_TYPES
        1. UNLABELED: it doesn't matter what labels are there.
        2. LABELED: also requires tag match (if there are multiple units with the same yield, requires one match)
        3. WEAK_LABELED: also requires weak tag match (if there are multiple units with the same yield,
                         requires one match)
        :param r: reference passage for fine-grained evaluation
        :returns: EvaluatorResults object if self.fscore is True, otherwise None
        """
        mutual = OrderedDict()
        counters = OrderedDict() if self.errors and eval_type == LABELED else None
        passage_yields = create_passage_yields(r or p2)
        reference_yield_tags = passage_yields[ALL_EDGES.name] if passage_yields else None
        maps = [{} if p is None else create_passage_yields(p, self.constructions, tags=False, reference=p2,
                                                           reference_yield_tags=reference_yield_tags) for p in (p1, p2)]
        if p1 is not None:
            ordered_constructions = [c for c in self.constructions if any(c in m for m in maps)]
            for m in maps[::-1]:
                ordered_constructions += [c for c in m if c not in ordered_constructions]
            for construction in ordered_constructions:
                yield_cands = [m.get(construction, {}) for m in maps]
                self.find_mutuals(*yield_cands, eval_type=eval_type, mutual_tags=mutual.setdefault(construction, {}),
                                  counter=None if counters is None else counters.setdefault(construction, Counter()))

        only = [{c: {y: tags for y, tags in d.items() if y not in mutual[c]} for c, d in m.items()} for m in maps]
        res = EvaluatorResults((c, SummaryStatistics(len(mutual[c]), len(only[0].get(c, ())), len(only[1].get(c, ())),
                                                     None if counters is None else counters.get(c))) for c in mutual)
        if self.verbose:
            print("Evaluation type: (" + eval_type + ")")
            if self.units and p1 is not None:
                print("==> Mutual Units:")
                print_tags_and_text(p1, mutual)
                print("==> Only in guessed:")
                print_tags_and_text(p1, only[0])
                print("==> Only in reference:")
                print_tags_and_text(p2, only[1])
            if self.fscore:
                res.print()
        return res
Esempio n. 4
0
 def get_scores(self, s1, s2, eval_type, r=None):
     """
     :param s1: sentence to compare
     :param s2: reference sentence
     :param eval_type: evaluation type to use, out of EVAL_TYPES
     1. UNLABELED: disregard dependency relation labels.
     2. LABELED: also requires relation match
     :param r: reference passage for fine-grained evaluation
     :param verbose: print extra information
     :param units: print all matches and mismatches
     :returns EvaluatorResults
     """
     self.reference_yield_tags = None if r is None else create_passage_yields(
         r, punct=True)[ALL_EDGES.name]
     converter = ConlluConverter()
     g1, g2 = list(map(list, list(map(converter.generate_graphs,
                                      (s1, s2)))))
     t1, t2 = list(map(join_tokens, (g1, g2)))
     assert t1 == t2, "Tokens do not match: '%s' != '%s'" % diff(t1, t2)
     maps = [self.map_by_construction(gs, eval_type) for gs in (g1, g2)]
     ordered_constructions = [
         c for c in self.constructions
         if c in maps[0] or c in maps[1] or c == PRIMARY
     ]
     ordered_constructions += [
         c for m in maps[::-1] for c in m if c not in ordered_constructions
     ]
     matches = OrderedDict()
     for construction in ordered_constructions:
         g, r = [m.get(construction, set()) for m in maps]
         matches[construction] = (g & r, g - r, r - g)
     res = EvaluatorResults((c, SummaryStatistics(*list(map(len, m))))
                            for c, m in matches.items())
     if self.verbose or self.units:
         print()
         print("Evaluation type: (" + eval_type + ")")
         if self.units:
             for c, ms in matches.items():
                 print(c.description + ":")
                 for title, m in zip(("Mutual Units", "Only in guessed",
                                      "Only in reference"), ms):
                     print("==> %s:" % title)
                     print(", ".join(
                         map(str,
                             sorted(m,
                                    key=lambda e: e.dependent.position))))
                 print()
         if self.verbose:
             res.print()
     return res