def test_complete_accuracy_with_container_sources(self): annotations = [ ContainerAnnotation( {'a': ClassificationAnnotation('identifier', 3)}) ] predictions = [ ContainerPrediction({ 'p': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) }) ] config = [{ 'type': 'accuracy', 'top_k': 1, 'annotation_source': 'a', 'prediction_source': 'p' }] dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == pytest.approx(1.0) assert evaluation_result.reference_value is None assert evaluation_result.abs_threshold is None assert evaluation_result.rel_threshold is None
def test_classification_per_class_accuracy_prediction_top3(self): annotation = [ ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 1) ] prediction = [ ClassificationPrediction('identifier_1', [1.0, 2.0, 3.0, 4.0]), ClassificationPrediction('identifier_2', [2.0, 1.0, 3.0, 4.0]) ] dataset = DummyDataset(label_map={0: '0', 1: '1', 2: '2', 3: '3'}) dispatcher = MetricsExecutor([{ 'type': 'accuracy_per_class', 'top_k': 3 }], dataset) dispatcher.update_metrics_on_batch(range(len(annotation)), annotation, prediction) for _, evaluation_result in dispatcher.iterate_metrics( annotation, prediction): assert evaluation_result.name == 'accuracy_per_class' assert len(evaluation_result.evaluated_value) == 4 assert evaluation_result.evaluated_value[0] == pytest.approx(0.0) assert evaluation_result.evaluated_value[1] == pytest.approx(0.5) assert evaluation_result.evaluated_value[2] == pytest.approx(0.0) assert evaluation_result.evaluated_value[3] == pytest.approx(0.0) assert evaluation_result.reference_value is None assert evaluation_result.abs_threshold is None assert evaluation_result.rel_threshold is None
def test_mae_with_positive_diff_between_annotation_and_prediction(self): annotations = [ RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1) ] predictions = [ RegressionPrediction('identifier', 1), RegressionPrediction('identifier2', -3) ] config = [{'type': 'mae'}] expected = EvaluationResult( pytest.approx([3.0, 1.0]), None, 'mae', 'mae', None, None, { 'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False, 'target': 'higher-worse' }, None) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result == expected
def test_mae_on_interval_values_in_range(self): annotations = [ RegressionAnnotation('identifier', 0.5), RegressionAnnotation('identifier', 0.5) ] predictions = [ RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 0.25) ] config = [{'type': 'mae_on_interval', 'end': 1}] expected = EvaluationResult( pytest.approx([0.375, 0.125]), None, 'mae_on_interval', 'mae_on_interval', None, None, { 'postfix': ' ', 'scale': 1, 'names': ['mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0'], 'calculate_mean': False, 'target': 'higher-worse', 'orig_names': ['mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0'] }, None) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result == expected
def test_multi_class(self): annotations = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), True) predictions = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), False) dispatcher = MetricsExecutor(create_config(self.name), multi_class_dataset()) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) expected = generate_expected_result((5.0+1.0+1.0)/(8.0+1.0+1.0), self.name) for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions): assert evaluation_result == expected
def test_multi_class_not_matched(self): annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True) predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False) dataset = multi_class_dataset() dispatcher = MetricsExecutor(create_config(self.name), dataset) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) expected = generate_expected_result([0.0], self.name, {1: dataset.labels[1]}) for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions): assert evaluation_result == expected
def test_config_default_presenter(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) ] config = [{'type': 'accuracy', 'top_k': 1}] dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for presenter, _ in dispatcher.iterate_metrics(annotations, predictions): assert isinstance(presenter, ScalarPrintPresenter)
def test_zero_accuracy_top_3(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0]) ] dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 3}], None) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == 0.0 assert evaluation_result.reference_value is None assert evaluation_result.abs_threshold is None assert evaluation_result.rel_threshold is None
def test_complete_accuracy_top_3(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [1.0, 3.0, 4.0, 2.0]) ] dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 3}], None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == pytest.approx(1.0) assert evaluation_result.reference_value is None assert evaluation_result.abs_threshold is None assert evaluation_result.rel_threshold is None
def test_threshold_is_10_by_config(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0]) ] dispatcher = MetricsExecutor([{ 'type': 'accuracy', 'top_k': 3, 'abs_threshold': 10 }], None) for _, evaluation_result in dispatcher.iterate_metrics([annotations], [predictions]): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == 0.0 assert evaluation_result.reference_value is None assert evaluation_result.abs_threshold == 10
def test_classification_per_class_accuracy_fully_zero_prediction(self): annotation = ClassificationAnnotation('identifier', 0) prediction = ClassificationPrediction('identifier', [1.0, 2.0]) dataset = DummyDataset(label_map={0: '0', 1: '1'}) dispatcher = MetricsExecutor([{ 'type': 'accuracy_per_class', 'top_k': 1 }], dataset) dispatcher.update_metrics_on_batch(range(1), [annotation], [prediction]) for _, evaluation_result in dispatcher.iterate_metrics([annotation], [prediction]): assert evaluation_result.name == 'accuracy_per_class' assert len(evaluation_result.evaluated_value) == 2 assert evaluation_result.evaluated_value[0] == pytest.approx(0.0) assert evaluation_result.evaluated_value[1] == pytest.approx(0.0) assert evaluation_result.reference_value is None assert evaluation_result.abs_threshold is None assert evaluation_result.rel_threshold is None
def test_mae_on_interval_default_all_missed(self): annotations = [RegressionAnnotation('identifier', -2)] predictions = [RegressionPrediction('identifier', 1)] config = [{'type': 'mae_on_interval', 'end': 1}] expected = EvaluationResult( pytest.approx([0.0]), None, 'mae_on_interval', 'mae_on_interval', None, None, { 'postfix': ' ', 'scale': 1, 'names': [], 'calculate_mean': False, 'target': 'higher-worse', 'orig_names': ['mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0'] }, None) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) with pytest.warns(UserWarning) as warnings: for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert len(warnings) == 1 assert evaluation_result == expected