Ejemplo n.º 1
0
    def _update_confusion_matrix(self, predictions: List[Optional[str]],
                                 labels: List[Optional[List[str]]]):
        """
        Update the confusion matrix given the batch and predictions.

        :param predictions: List of strings of length batchsize. Each string is a label
            predicted by the classifier. A string will be None if the corresponding
            observation is empty.
        :param labels: List of label fields from the observations. Fields may be Nones
            if the observations are empty.
        """
        f1_dict = {}
        explode_labels = []
        for x in labels:
            if x is not None and len(x) > 0:
                assert len(
                    x) == 1, 'Multiple labels are not currently supported!'
                explode_labels.append(x[0])
            else:
                explode_labels.append(None)

        # Check that predictions and labels have Nones in the same places, and then
        # filter the Nones out because we can't compute metrics with them
        assert len(predictions) == len(labels)
        assert all([(pred is None and label is None)
                    or (pred is not None and label is not None)
                    for pred, label in zip(predictions, explode_labels)])
        filtered_predictions = [
            pred for pred in predictions if pred is not None
        ]
        filtered_labels = [
            label for label in explode_labels if label is not None
        ]

        class_list = set(filtered_predictions + filtered_labels)
        for class_name in class_list:
            prec_str = f'class_{class_name}_prec'
            recall_str = f'class_{class_name}_recall'
            f1_str = f'class_{class_name}_f1'
            precision, recall, f1 = ConfusionMatrixMetric.compute_metrics(
                filtered_predictions, filtered_labels, class_name)
            f1_dict[class_name] = f1
            self.record_local_metric(prec_str, precision)
            self.record_local_metric(recall_str, recall)
            self.record_local_metric(f1_str, f1)
        self.record_local_metric('weighted_f1',
                                 WeightedF1Metric.compute_many(f1_dict))
Ejemplo n.º 2
0
    def test_classifier_metrics(self):
        # We assume a batch of 16 samples, binary classification case, from 2 tasks.
        # task 1
        # confusion matrix expected, for class ok,
        # TP = 2, TN = 2, FP = 2, FN = 2
        report1 = {}
        report2 = {}
        task1_f1s = {}
        task2_f1s = {}
        classes = ['class_ok', 'class_notok']
        task1_predictions = [
            'class_ok',
            'class_ok',
            'class_ok',
            'class_ok',
            'class_notok',
            'class_notok',
            'class_notok',
            'class_notok',
        ]
        task1_gold_labels = [
            'class_ok',
            'class_ok',
            'class_notok',
            'class_notok',
            'class_ok',
            'class_ok',
            'class_notok',
            'class_notok',
        ]
        for each in classes:
            precisions, recalls, f1s = ConfusionMatrixMetric.compute_metrics(
                task1_predictions, task1_gold_labels, each)
            report1.update({
                f'{each}_precision': sum(precisions, None),
                f'{each}_recall': sum(recalls, None),
                f'{each}_f1': sum(f1s, None),
            })
            task1_f1s[each] = f1s
        report1['weighted_f1'] = sum(WeightedF1Metric.compute_many(task1_f1s),
                                     None)
        # task 2, for class ok
        # TP = 3, TN = 2, FP = 2, FN = 1
        # for class not ok
        # TP = 2, TN = 3, FP = 1, FN = 2
        task2_predictions = [
            'class_ok',
            'class_ok',
            'class_ok',
            'class_ok',
            'class_ok',
            'class_notok',
            'class_notok',
            'class_notok',
        ]
        task2_gold_labels = [
            'class_ok',
            'class_ok',
            'class_notok',
            'class_notok',
            'class_ok',
            'class_ok',
            'class_notok',
            'class_notok',
        ]
        for each in classes:
            precisions, recalls, f1s = ConfusionMatrixMetric.compute_metrics(
                task2_predictions, task2_gold_labels, each)
            report2.update({
                f'{each}_precision': sum(precisions, None),
                f'{each}_recall': sum(recalls, None),
                f'{each}_f1': sum(f1s, None),
            })
            task2_f1s[each] = f1s
        report2['weighted_f1'] = sum(WeightedF1Metric.compute_many(task2_f1s),
                                     None)

        agg = aggregate_named_reports({
            'task1': report1,
            'task2': report2
        },
                                      micro_average=False)
        # task1
        assert agg['task1/class_ok_precision'] == 0.5
        assert agg['task1/class_ok_recall'] == 0.5
        assert agg['task1/class_ok_f1'] == 0.5
        # task2
        assert agg['task2/class_ok_precision'] == 3 / 5
        assert agg['task2/class_ok_recall'] == 3 / 4
        assert agg['task2/class_ok_f1'] == 2 / 3
        # task2 not ok
        assert agg['task2/class_notok_precision'] == 2 / 3
        assert agg['task2/class_notok_recall'] == 0.5
        assert agg['task2/class_notok_f1'] == 4 / 7
        # weighted f1
        assert agg['task1/weighted_f1'] == 0.5
        assert agg['task2/weighted_f1'] == (2 / 3) * 0.5 + (4 / 7) * 0.5
        # all
        assert agg['weighted_f1'] == (0.5 + (2 / 3) * 0.5 + (4 / 7) * 0.5) / 2