def test_fscore_empty(test_mask_batch):
    num_classes = 6
    pred_classes = (test_mask_batch - 1) % num_classes
    assert pred_classes.shape == (4, 4, 4)
    tp, fp, tn, fn = func.statistics_step(pred_classes,
                                          test_mask_batch,
                                          num_classes=num_classes,
                                          ignore_index=255,
                                          reduction=False)
    score = func.f1_score(tp, fp, fn, reduce=True)
    assert score == 0.0
    LOG.debug(score)
def test_fscore_best():
    num_classes = 6
    test_mask_batch = torch.randint(0, 6, size=(4, 4, 4))
    pred_classes = test_mask_batch.clone()
    assert pred_classes.shape == (4, 4, 4)
    tp, fp, tn, fn = func.statistics_step(pred_classes,
                                          test_mask_batch,
                                          num_classes=num_classes,
                                          ignore_index=255,
                                          reduction=False)
    score = func.f1_score(tp, fp, fn, reduce=False)
    LOG.debug(score)
    assert torch.all(score[:4] >= 0.99)
Example #3
0
    def compute(self) -> torch.Tensor:
        """Computes the F1 score over every device, using the accumulated statistics.
        Same micro and macro-average considerations hold for this metric as well.

        :return: tensor with empty size when reduced, or (C,) where C in the number of classes
        :rtype: torch.Tensor
        """
        score = func.f1_score(tp=self.tp,
                              fp=self.fp,
                              fn=self.fn,
                              reduce=self.is_micro)
        if self.reduction == ReductionType.MACRO.value:
            score = score.mean()
        return score
def test_fscore_batch_macro(test_pred_batch, test_mask_batch):
    pred_classes = test_pred_batch.argmax(dim=1)
    assert pred_classes.shape == (4, 4, 4)
    tp, fp, tn, fn = func.statistics_step(pred_classes,
                                          test_mask_batch,
                                          num_classes=6,
                                          ignore_index=255,
                                          reduction=False)
    fscore = func.f1_score(tp, fp, fn, reduce=True)
    y_true, y_pred = func.valid_samples(255, test_mask_batch, pred_classes)
    skl_prec = f1_score(y_true.cpu().numpy(),
                        y_pred.cpu().numpy(),
                        average="macro")
    LOG.debug("sklearn: %s - custom: %s", str(skl_prec), str(fscore))
    # sklearn does not account for empty classes
    diff = abs(skl_prec - fscore.item())
    assert diff <= EPS