Ejemplo n.º 1
0
 def aggregate_stats(self, stats: MetricStats) -> np.ndarray:
     """
     Aggregate sufficient statistics from multiple examples into a single example
     :param stats: stats for every example
     :return: aggregated stats
     """
     if self.config.name in {'bleu', 'chrf'}:
         return np.sum(stats.get_data(), axis=0)
     else:
         return np.mean(stats.get_data(), axis=0)
Ejemplo n.º 2
0
 def calc_stats_from_data(
         self,
         true_data: list,
         pred_data: list,
         config: Optional[MetricConfig] = None) -> MetricStats:
     return MetricStats(
         np.array([(1.0 if x == y else 0.0)
                   for x, y in zip(true_data, pred_data)]))
Ejemplo n.º 3
0
 def calc_stats_from_data(
         self,
         true_data: list,
         pred_data: list,
         config: Optional[MetricConfig] = None) -> MetricStats:
     """
     Take in a list of floats (token-level), or list of lists of floats (sentence
     level) and either one float for each or float+length rows
     """
     if len(pred_data) == 0 or isinstance(pred_data[0], float):
         return MetricStats(np.array(pred_data))
     elif isinstance(pred_data[0], list):
         return MetricStats(np.array([[sum(x), len(x)] for x in pred_data]))
     else:
         t = type(pred_data[0])
         raise ValueError(
             f'Invalid type of pred_data for calc_stats_from_data {t}')
Ejemplo n.º 4
0
 def filter(self, indices: Union[list[int], np.ndarray]) -> MetricStats:
     """
     Return a view of these stats filtered down to the indicated indices
     """
     sdata: np.ndarray = self.get_data()
     if not isinstance(indices, np.ndarray):
         indices = np.array(indices)
     return MetricStats(sdata[indices])
Ejemplo n.º 5
0
 def calc_stats_from_data(
         self,
         true_data: list,
         pred_data: list,
         config: Optional[MetricConfig] = None) -> MetricStats:
     return MetricStats(
         np.array(
             [self.mrr_val(t, p) for t, p in zip(true_data, pred_data)]))
Ejemplo n.º 6
0
    def calc_stats_from_data(
        self,
        true_data: list[list[str]],
        pred_data: list[list[str]],
        config: Optional[MetricConfig] = None,
    ) -> MetricStats:
        """
        Return sufficient statistics necessary to compute f-score.
        :param true_data: True outputs
        :param pred_data: Predicted outputs
        :param config: Configuration, if over-riding the default
        :return: Returns stats for each class (integer id c) in the following columns of
            MetricStats
            * c*stat_mult + 0: occurrences in the true output
            * c*stat_mult + 1: occurrences in the predicted output
            * c*stat_mult + 2: number of matches with the true output
        """

        # Get span ops
        seq_config = cast(SeqF1ScoreConfig, config or self.config)
        if seq_config.tag_schema == 'bio':
            span_ops: SpanOps = BIOSpanOps()
        elif seq_config.tag_schema == 'bmes':
            span_ops = BMESSpanOps()
        else:
            raise ValueError(f'Illegal tag_schema {seq_config.tag_schema}')

        true_spans_list: list[list[tuple[str, int, int]]] = [
            span_ops.get_spans_simple(true_tags) for true_tags in true_data
        ]
        pred_spans_list: list[list[tuple[str, int, int]]] = [
            span_ops.get_spans_simple(pred_tags) for pred_tags in pred_data
        ]

        # 2. Get tag space
        all_classes = set([
            span[0]
            for span in list(itertools.chain.from_iterable(true_spans_list)) +
            list(itertools.chain.from_iterable(pred_spans_list))
        ])
        tag_ids = {k: v for v, k in enumerate([x for x in all_classes])}

        # 3. Create the sufficient statistics
        stat_mult = 3
        n_data, n_classes = len(true_data), len(tag_ids)
        # This is a bit memory inefficient if there's a large number of classes
        stats = np.zeros((n_data, n_classes * stat_mult))

        for i, (true_spans,
                pred_spans) in enumerate(zip(true_spans_list,
                                             pred_spans_list)):
            matched_spans = set(true_spans).intersection(pred_spans)
            for offset, spans in enumerate(
                (true_spans, pred_spans, matched_spans)):
                for span in spans:
                    c = tag_ids[span[0]]
                    stats[i, c * stat_mult + offset] += 1
        return MetricStats(stats)
Ejemplo n.º 7
0
 def calc_stats_from_rank(
     self,
     rank_data: list,
     config: Optional[MetricConfig] = None
 ) -> MetricStats:  # TODO(Pengfei): why do we need the 3rd argument?
     config = cast(HitsConfig, self._get_config(config))
     return MetricStats(
         np.array([(1.0 if rank <= config.hits_k else 0.0)
                   for rank in rank_data]))
Ejemplo n.º 8
0
 def calc_stats_from_data(
     self,
     true_data: list,
     pred_data: list,
     config: Optional[MetricConfig] = None
 ) -> MetricStats:  # TODO(Pengfei): why do we need the 3rd argument?
     config = cast(HitsConfig, self._get_config(config))
     return MetricStats(
         np.array([(1.0 if t in p[:config.hits_k] else 0.0)
                   for t, p in zip(true_data, pred_data)]))
Ejemplo n.º 9
0
 def aggregate_stats(self, stats: MetricStats) -> np.ndarray:
     """
     Aggregate sufficient statistics from multiple examples into a single example
     :param stats: stats for every example
     :return: aggregated stats
     """
     data = stats.get_data()
     if data.size == 0:
         return np.array(0.0)
     else:
         return np.sum(data, axis=0)
Ejemplo n.º 10
0
 def calc_stats_from_data(
     self,
     true_data: list[Union[str, list[str]]],
     pred_data: list[str],
     config: Optional[MetricConfig] = None,
 ) -> MetricStats:
     true_data = [[x] if isinstance(x, str) else x for x in true_data]
     config = self._get_config(config)
     preprocessor = ExtractiveQAPreprocessor(
         language=config.source_language)
     return MetricStats(
         np.array([
             max([self.sample_level_metric(t, p, preprocessor) for t in ts])
             for ts, p in zip(true_data, pred_data)
         ]))
Ejemplo n.º 11
0
    def calc_stats_from_data(
            self,
            true_data: list,
            pred_data: list,
            config: Optional[MetricConfig] = None) -> MetricStats:
        """
        Return sufficient statistics necessary to compute f-score.
        :param true_data: True outputs
        :param pred_data: Predicted outputs
        :param config: Configuration, if overloading the default for this object
        :return: Returns stats for each class (integer id c) in the following columns of
            MetricStats
            * c*stat_mult + 0: occurrences in the true output
            * c*stat_mult + 1: occurrences in the predicted output
            * c*stat_mult + 2: number of matches with the true output
            * c*stat_mult + 3: number of matches with the predicted output
                (when self.separate_match=True only)
        """
        config = cast(F1ScoreConfig, self._get_config(config))
        stat_mult: int = 4 if config.separate_match else 3

        id_map: dict[str, int] = {}
        if config.ignore_classes is not None:
            for ignore_class in config.ignore_classes:
                id_map[ignore_class] = -1

        for word in itertools.chain(true_data, pred_data):
            if word not in id_map:
                id_map[word] = len(id_map)
        n_data = len(true_data)
        n_classes = len(id_map)
        # This is a bit memory inefficient if there's a large number of classes
        stats = np.zeros((n_data, n_classes * stat_mult))
        for i, (t, p) in enumerate(zip(true_data, pred_data)):
            tid, pid = id_map[t], id_map[p]
            if tid != -1:
                stats[i, tid * stat_mult + 0] += 1
            if pid != -1:
                stats[i, pid * stat_mult + 1] += 1
                if tid == pid:
                    stats[i, tid * stat_mult + 2] += 1
                    if config.separate_match:
                        stats[i, tid * stat_mult + 3] += 1
        return MetricStats(stats)
Ejemplo n.º 12
0
    def calc_stats_from_data(
        self,
        true_edits_ldl: list[dict[str, list]],
        pred_edits_ldl: list[dict[str, list]],
        config: Optional[MetricConfig] = None,
    ) -> MetricStats:
        def _get_flatten_edits(edits: list[dict]):
            flatten_edits = []
            for edit in edits:
                start_idx, end_idx, corrections = (
                    edit["start_idx"],
                    edit["end_idx"],
                    edit["corrections"],
                )
                for correction in corrections:
                    flatten_edits.append((start_idx, end_idx, correction))
            return flatten_edits

        recall = []
        for true_edits_dl, pred_edits_dl in zip(true_edits_ldl,
                                                pred_edits_ldl):
            true_edits_ld = [
                dict(zip(true_edits_dl, t))
                for t in zip(*true_edits_dl.values())
            ]
            pred_dicts_ld = [
                dict(zip(pred_edits_dl, t))
                for t in zip(*pred_edits_dl.values())
            ]
            gold_flatten_edits = _get_flatten_edits(true_edits_ld)
            pred_flatten_edits = _get_flatten_edits(pred_dicts_ld)
            for gold_flatten_edit in gold_flatten_edits:
                if gold_flatten_edit in pred_flatten_edits:
                    recall.append(1.0)
                else:
                    recall.append(0.0)
        return MetricStats(np.array(recall))
Ejemplo n.º 13
0
 def calc_stats_from_rank(
         self,
         rank_data: list,
         config: Optional[MetricConfig] = None) -> MetricStats:
     return MetricStats(
         np.array([1.0 / rank for rank in rank_data if rank is not None]))