def evaluate(self,
              output_resultset: ResultSetEntity,
              evaluation_metric: Optional[str] = None):
     performance = MetricsHelper.compute_accuracy(
         output_resultset).get_performance()
     logger.info(f"Computes performance of {performance}")
     output_resultset.performance = performance
예제 #2
0
    def test_evaluate_interface(self):
        """
        <b>Description:</b>
        Check IEvaluationTask class object initialization

        <b>Input data:</b>
        IEvaluationTask object

        <b>Expected results:</b>
        Test passes if IEvaluationTask object evaluate method raises NotImplementedError exception
        """
        dataset = DatasetEntity()
        configuration = ModelConfiguration(
            configurable_parameters=ConfigurableParameters(
                header="Test Header"),
            label_schema=LabelSchemaEntity(),
        )
        model_entity = ModelEntity(configuration=configuration,
                                   train_dataset=dataset)
        with pytest.raises(NotImplementedError):
            IEvaluationTask().evaluate(
                ResultSetEntity(
                    model=model_entity,
                    ground_truth_dataset=dataset,
                    prediction_dataset=dataset,
                ))
예제 #3
0
 def evaluate(self,
              output_result_set: ResultSetEntity,
              evaluation_metric: Optional[str] = None):
     if evaluation_metric is not None:
         logger.warning(f'Requested to use {evaluation_metric} metric,'
                        'but parameter is ignored. Use accuracy instead.')
     output_result_set.performance = MetricsHelper.compute_accuracy(
         output_result_set).get_performance()
예제 #4
0
 def evaluate(self,
              output_resultset: ResultSetEntity,
              evaluation_metric: Optional[str] = None):
     """
     Evaluate the performance on a result set.
     """
     f_measure_metrics = MetricsHelper.compute_f_measure(output_resultset)
     output_resultset.performance = f_measure_metrics.get_performance()
     logger.info("F-measure after evaluation: %d",
                 f_measure_metrics.f_measure.value)
    def evaluate(self,
                 output_resultset: ResultSetEntity,
                 evaluation_metric: Optional[str] = None):
        """Evaluate the performance of the model.

        Args:
            output_resultset (ResultSetEntity): Result set storing ground truth and predicted dataset.
            evaluation_metric (Optional[str], optional): Evaluation metric. Defaults to None.
        """
        output_resultset.performance = MetricsHelper.compute_f_measure(
            output_resultset).get_performance()
예제 #6
0
def run_evaluation(dataset, task, model):
    logger.debug("Evaluation: Get predictions on the dataset")
    predicted_dataset = task.infer(dataset.with_empty_annotations(),
                                   InferenceParameters(is_evaluation=True))
    resultset = ResultSetEntity(
        model=model,
        ground_truth_dataset=dataset,
        prediction_dataset=predicted_dataset,
    )
    logger.debug("Evaluation: Estimate quality on dataset")
    task.evaluate(resultset)
    evaluation_performance = resultset.performance
    logger.info(f"Evaluation: performance={evaluation_performance}")
    score_name, score_value = performance_to_score_name_value(
        evaluation_performance)
    return score_name, score_value
예제 #7
0
def main():
    """
    Main function that is used for model evaluation.
    """

    # Dynamically create an argument parser based on override parameters.
    args, template, hyper_parameters = parse_args()
    # Get new values from user's input.
    updated_hyper_parameters = gen_params_dict_from_args(args)
    # Override overridden parameters by user's values.
    override_parameters(updated_hyper_parameters, hyper_parameters)

    hyper_parameters = create(hyper_parameters)

    # Get classes for Task, ConfigurableParameters and Dataset.
    if args.load_weights.endswith(".bin") or args.load_weights.endswith(".xml"):
        task_class = get_impl_class(template.entrypoints.openvino)
    else:
        task_class = get_impl_class(template.entrypoints.base)

    dataset_class = get_dataset_class(template.task_type)

    dataset = dataset_class(
        test_subset={"ann_file": args.test_ann_files, "data_root": args.test_data_roots}
    )

    dataset_label_schema = generate_label_schema(dataset, template.task_type)
    check_label_schemas(
        read_label_schema(
            os.path.join(os.path.dirname(args.load_weights), "label_schema.json")
        ),
        dataset_label_schema,
    )

    environment = TaskEnvironment(
        model=None,
        hyper_parameters=hyper_parameters,
        label_schema=dataset_label_schema,
        model_template=template,
    )

    model = read_model(environment.get_model_configuration(), args.load_weights, None)
    environment.model = model

    task = task_class(task_environment=environment)

    validation_dataset = dataset.get_subset(Subset.TESTING)
    predicted_validation_dataset = task.infer(
        validation_dataset.with_empty_annotations(),
        InferenceParameters(is_evaluation=True),
    )

    resultset = ResultSetEntity(
        model=model,
        ground_truth_dataset=validation_dataset,
        prediction_dataset=predicted_validation_dataset,
    )
    task.evaluate(resultset)
    assert resultset.performance is not None
    print(resultset.performance)

    if args.save_performance:
        with open(args.save_performance, "w", encoding="UTF-8") as write_file:
            json.dump(
                {resultset.performance.score.name: resultset.performance.score.value},
                write_file,
            )
예제 #8
0
def main():
    """
    Main function that is used for model training.
    """

    # Dynamically create an argument parser based on override parameters.
    args, template, hyper_parameters = parse_args()
    # Get new values from user's input.
    updated_hyper_parameters = gen_params_dict_from_args(args)
    # Override overridden parameters by user's values.
    override_parameters(updated_hyper_parameters, hyper_parameters)

    hyper_parameters = create(hyper_parameters)

    # Get classes for Task, ConfigurableParameters and Dataset.
    task_class = get_impl_class(template.entrypoints.base)
    dataset_class = get_dataset_class(template.task_type)

    # Create instances of Task, ConfigurableParameters and Dataset.
    dataset = dataset_class(
        train_subset={
            "ann_file": args.train_ann_files,
            "data_root": args.train_data_roots,
        },
        val_subset={"ann_file": args.val_ann_files, "data_root": args.val_data_roots},
    )

    environment = TaskEnvironment(
        model=None,
        hyper_parameters=hyper_parameters,
        label_schema=generate_label_schema(dataset, template.task_type),
        model_template=template,
    )

    if args.load_weights:
        environment.model = ModelEntity(
            train_dataset=dataset,
            configuration=environment.get_model_configuration(),
            model_adapters={
                "weights.pth": ModelAdapter(read_binary(args.load_weights))
            },
        )

    if args.enable_hpo:
        run_hpo(args, environment, dataset, template.task_type)

    task = task_class(task_environment=environment)

    output_model = ModelEntity(
        dataset,
        environment.get_model_configuration(),
        model_status=ModelStatus.NOT_READY,
    )

    task.train(dataset, output_model, train_parameters=TrainParameters())

    save_model_data(output_model, args.save_model_to)

    validation_dataset = dataset.get_subset(Subset.VALIDATION)
    predicted_validation_dataset = task.infer(
        validation_dataset.with_empty_annotations(),
        InferenceParameters(is_evaluation=True),
    )

    resultset = ResultSetEntity(
        model=output_model,
        ground_truth_dataset=validation_dataset,
        prediction_dataset=predicted_validation_dataset,
    )
    task.evaluate(resultset)
    assert resultset.performance is not None
    print(resultset.performance)
    def test_resultset_entity(self):
        """
        <b>Description:</b>
        Check the ResultSetEntity can correctly return the value

        <b>Input data:</b>
        Mock data

        <b>Expected results:</b>
        Test passes if incoming data is processed correctly

        <b>Steps</b>
        1. Create dummy data
        2. Check the processing of default values
        3. Check the processing of changed values
        """

        test_data = {
            "model": None,
            "ground_truth_dataset": None,
            "prediction_dataset": None,
            "purpose": None,
            "performance": None,
            "creation_date": None,
            "id": None,
        }

        result_set = ResultSetEntity(**test_data)

        for name, value in test_data.items():
            set_attr_name = f"test_{name}"
            if name in [
                "model",
                "ground_truth_dataset",
                "prediction_dataset",
                "purpose",
            ]:
                assert getattr(result_set, name) == value
                setattr(result_set, name, set_attr_name)
                assert getattr(result_set, name) == set_attr_name

        assert result_set.performance == NullPerformance()
        assert type(result_set.creation_date) == datetime.datetime
        assert result_set.id == ID()

        assert result_set.has_score_metric() is False
        result_set.performance = "test_performance"
        assert result_set.performance != NullPerformance()
        assert result_set.has_score_metric() is True

        creation_date = self.creation_date
        result_set.creation_date = creation_date
        assert result_set.creation_date == creation_date

        set_attr_id = ID(123456789)
        result_set.id = set_attr_id
        assert result_set.id == set_attr_id

        test_result_set_repr = [
            f"model={result_set.model}",
            f"ground_truth_dataset={result_set.ground_truth_dataset}",
            f"prediction_dataset={result_set.prediction_dataset}",
            f"purpose={result_set.purpose}",
            f"performance={result_set.performance}",
            f"creation_date={result_set.creation_date}",
            f"id={result_set.id}",
        ]

        for i in test_result_set_repr:
            assert i in repr(result_set)