def test_correct_score_calculation_multi_label_micro(self):
        evaluator = evaluators.F1Evaluator(model_output_key=None,
                                           batch_target_key='target',
                                           average='micro')

        output = torch.tensor(
            [[0.6, 0.2], [0.7, 0.2], [0.6, 0.6], [0.3, 0.55]],
            dtype=torch.float32)
        batch = {
            'target':
            torch.tensor([[1, 1], [0, 1], [1, 0], [0, 1]], dtype=torch.float32)
        }
        evaluator.step(output, batch)

        output = torch.tensor([[0.6, 0.4]], dtype=torch.float32)
        batch = {'target': torch.tensor([[1, 1]], dtype=torch.float32)}
        evaluator.step(output, batch)

        res = evaluator.calculate()

        correct = metrics.f1_score(y_pred=np.array(
            [0.6, 0.7, 0.6, 0.3, 0.6, 0.2, 0.2, 0.6, 0.55, 0.4]) > 0.5,
                                   y_true=np.array(
                                       [1, 0, 1, 0, 1, 1, 1, 0, 1, 1]))

        self.assertAlmostEqual(res.score, correct)
    def test_f1_multi_label_macro(self):
        bi_sequence_len_idx = 1
        batch_input_key = 'input'
        model_output_key = None
        batch_target_key = 'target'
        end_padded = True
        wrapped_evaluator = evaluators.F1Evaluator(
            model_output_key=model_output_key,
            batch_target_key=batch_target_key,
            average='macro')

        evaluator = evaluators.SequenceLabelingEvaluatorWrapper(
            evaluator=wrapped_evaluator,
            batch_input_sequence_length_idx=bi_sequence_len_idx,
            batch_input_key=batch_input_key,
            model_output_key=model_output_key,
            batch_target_key=batch_target_key,
            end_padded=end_padded)

        output = torch.tensor([[[0.6, 0.2], [0.7, 0.2], [-2., -2.]],
                               [[0.6, 0.6], [0.3, 0.55], [-2., -2.]]],
                              dtype=torch.float32)
        batch = {
            'target':
            torch.tensor(
                [[[1, 1], [0, 1], [-1, -1]], [[1, 0], [0, 1], [-1, -1]]],
                dtype=torch.float32),
            'input': [None, torch.tensor([2, 2], dtype=torch.int)]
        }
        evaluator.step(output, batch)

        output = torch.tensor([[[0.6, 0.4]]], dtype=torch.float32)
        batch = {
            'target': torch.tensor([[[1, 1]]], dtype=torch.float32),
            'input': [None, torch.tensor([1], dtype=torch.int)]
        }
        evaluator.step(output, batch)

        res = evaluator.calculate()

        lable1_score = metrics.f1_score(
            y_pred=np.array([0.6, 0.7, 0.6, 0.3, 0.6]) > 0.5,
            y_true=np.array([1, 0, 1, 0, 1]))

        label2_score = metrics.f1_score(
            y_pred=np.array([0.2, 0.2, 0.6, 0.55, 0.4]) > 0.5,
            y_true=np.array([1, 1, 0, 1, 1]))

        correct = (lable1_score + label2_score) / 2.

        self.assertAlmostEqual(res.score, correct)
    def test_correct_score_calculation_binary(self):
        evaluator = evaluators.F1Evaluator(model_output_key=None,
                                           batch_target_key='target',
                                           average='binary')

        output = torch.tensor([0.9, 0.2, 0.8, 0.3], dtype=torch.float32)
        batch = {'target': torch.tensor([1, 1, 0, 0], dtype=torch.float32)}
        evaluator.step(output, batch)

        output = torch.tensor([0.2, 0.98, 0.76], dtype=torch.float32)
        batch = {'target': torch.tensor([1, 1, 0], dtype=torch.float32)}
        evaluator.step(output, batch)

        res = evaluator.calculate()

        self.assertAlmostEqual(res.score, 0.5)
    def test_f1_binary(self):
        bi_sequence_len_idx = 1
        batch_input_key = 'input'
        model_output_key = None
        batch_target_key = 'target'
        end_padded = True

        wrapped_evaluator = evaluators.F1Evaluator(
            model_output_key=model_output_key,
            batch_target_key=batch_target_key,
            average='binary')
        evaluator = evaluators.SequenceLabelingEvaluatorWrapper(
            evaluator=wrapped_evaluator,
            batch_input_sequence_length_idx=bi_sequence_len_idx,
            batch_input_key=batch_input_key,
            model_output_key=model_output_key,
            batch_target_key=batch_target_key,
            end_padded=end_padded)

        output = torch.tensor([[0.9, 0.2, -2.], [0.8, 0.3, -2.]])
        batch = {
            'target': torch.tensor([[1., 1., -1.], [0., 0., -1.]]),
            'input': [None, torch.tensor([2, 2], dtype=torch.int)]
        }
        evaluator.step(output, batch)

        output = torch.tensor([[0.2, 0.98, -2.], [0.76, -2, -2.]])
        batch = {
            'target': torch.tensor([[1., 1., -1.], [0., -1, -1.]]),
            'input': [None, torch.tensor([2, 1], dtype=torch.int)]
        }
        evaluator.step(output, batch)

        res = evaluator.calculate()

        self.assertAlmostEqual(res.score, 0.5)