def test_f1_multi_class_macro(self): bi_sequence_len_idx = 1 batch_input_key = 'input' model_output_key = None batch_target_key = 'target' end_padded = True wrapped_evaluator = evaluators.MultiClassF1Evaluator( model_output_key=model_output_key, batch_target_key=batch_target_key, average='macro') evaluator = evaluators.SequenceLabelingEvaluatorWrapper( evaluator=wrapped_evaluator, batch_input_sequence_length_idx=bi_sequence_len_idx, batch_input_key=batch_input_key, model_output_key=model_output_key, batch_target_key=batch_target_key, end_padded=end_padded) output = torch.tensor([[[0.5, 0.1, 0.4], [0.3, 0.3, 0.4]], [[0.6, 0.4, 0.0], [-2., -2., -2.]]], dtype=torch.float32) batch = { 'target': torch.tensor([[0, 2], [2, -1]], dtype=torch.float32), 'input': [None, torch.tensor([2, 1], dtype=torch.int)] } evaluator.step(output, batch) output = torch.tensor([[[0.1, 0.1, 0.8]]], dtype=torch.float32) batch = { 'target': torch.tensor([[2]], dtype=torch.float32), 'input': [None, torch.tensor([1], dtype=torch.int)] } evaluator.step(output, batch) res = evaluator.calculate() correct = metrics.f1_score(y_pred=np.array([0, 2, 0, 2]), y_true=np.array([0, 2, 2, 2]), average='macro') self.assertAlmostEqual(res.score, correct)
def test_correct_score_calculation_micro(self): evaluator = evaluators.MultiClassF1Evaluator(model_output_key=None, batch_target_key='target', average='micro') output = torch.tensor( [[0.5, 0.1, 0.4], [0.3, 0.3, 0.4], [0.5, 0.5, 0.0]], dtype=torch.float32) batch = {'target': torch.tensor([0, 2, 2], dtype=torch.float32)} evaluator.step(output, batch) output = torch.tensor([[0.1, 0.1, 0.8]], dtype=torch.float32) batch = {'target': torch.tensor([2], dtype=torch.float32)} evaluator.step(output, batch) res = evaluator.calculate() correct = metrics.f1_score(y_pred=np.array([0, 2, 0, 2]), y_true=np.array([0, 2, 2, 2]), average='micro') self.assertAlmostEqual(res.score, correct)