def evaluate(self, output_resultset: ResultSetEntity, evaluation_metric: Optional[str] = None): performance = MetricsHelper.compute_accuracy( output_resultset).get_performance() logger.info(f"Computes performance of {performance}") output_resultset.performance = performance
def test_evaluate_interface(self): """ <b>Description:</b> Check IEvaluationTask class object initialization <b>Input data:</b> IEvaluationTask object <b>Expected results:</b> Test passes if IEvaluationTask object evaluate method raises NotImplementedError exception """ dataset = DatasetEntity() configuration = ModelConfiguration( configurable_parameters=ConfigurableParameters( header="Test Header"), label_schema=LabelSchemaEntity(), ) model_entity = ModelEntity(configuration=configuration, train_dataset=dataset) with pytest.raises(NotImplementedError): IEvaluationTask().evaluate( ResultSetEntity( model=model_entity, ground_truth_dataset=dataset, prediction_dataset=dataset, ))
def evaluate(self, output_result_set: ResultSetEntity, evaluation_metric: Optional[str] = None): if evaluation_metric is not None: logger.warning(f'Requested to use {evaluation_metric} metric,' 'but parameter is ignored. Use accuracy instead.') output_result_set.performance = MetricsHelper.compute_accuracy( output_result_set).get_performance()
def evaluate(self, output_resultset: ResultSetEntity, evaluation_metric: Optional[str] = None): """ Evaluate the performance on a result set. """ f_measure_metrics = MetricsHelper.compute_f_measure(output_resultset) output_resultset.performance = f_measure_metrics.get_performance() logger.info("F-measure after evaluation: %d", f_measure_metrics.f_measure.value)
def evaluate(self, output_resultset: ResultSetEntity, evaluation_metric: Optional[str] = None): """Evaluate the performance of the model. Args: output_resultset (ResultSetEntity): Result set storing ground truth and predicted dataset. evaluation_metric (Optional[str], optional): Evaluation metric. Defaults to None. """ output_resultset.performance = MetricsHelper.compute_f_measure( output_resultset).get_performance()
def run_evaluation(dataset, task, model): logger.debug("Evaluation: Get predictions on the dataset") predicted_dataset = task.infer(dataset.with_empty_annotations(), InferenceParameters(is_evaluation=True)) resultset = ResultSetEntity( model=model, ground_truth_dataset=dataset, prediction_dataset=predicted_dataset, ) logger.debug("Evaluation: Estimate quality on dataset") task.evaluate(resultset) evaluation_performance = resultset.performance logger.info(f"Evaluation: performance={evaluation_performance}") score_name, score_value = performance_to_score_name_value( evaluation_performance) return score_name, score_value
def main(): """ Main function that is used for model evaluation. """ # Dynamically create an argument parser based on override parameters. args, template, hyper_parameters = parse_args() # Get new values from user's input. updated_hyper_parameters = gen_params_dict_from_args(args) # Override overridden parameters by user's values. override_parameters(updated_hyper_parameters, hyper_parameters) hyper_parameters = create(hyper_parameters) # Get classes for Task, ConfigurableParameters and Dataset. if args.load_weights.endswith(".bin") or args.load_weights.endswith(".xml"): task_class = get_impl_class(template.entrypoints.openvino) else: task_class = get_impl_class(template.entrypoints.base) dataset_class = get_dataset_class(template.task_type) dataset = dataset_class( test_subset={"ann_file": args.test_ann_files, "data_root": args.test_data_roots} ) dataset_label_schema = generate_label_schema(dataset, template.task_type) check_label_schemas( read_label_schema( os.path.join(os.path.dirname(args.load_weights), "label_schema.json") ), dataset_label_schema, ) environment = TaskEnvironment( model=None, hyper_parameters=hyper_parameters, label_schema=dataset_label_schema, model_template=template, ) model = read_model(environment.get_model_configuration(), args.load_weights, None) environment.model = model task = task_class(task_environment=environment) validation_dataset = dataset.get_subset(Subset.TESTING) predicted_validation_dataset = task.infer( validation_dataset.with_empty_annotations(), InferenceParameters(is_evaluation=True), ) resultset = ResultSetEntity( model=model, ground_truth_dataset=validation_dataset, prediction_dataset=predicted_validation_dataset, ) task.evaluate(resultset) assert resultset.performance is not None print(resultset.performance) if args.save_performance: with open(args.save_performance, "w", encoding="UTF-8") as write_file: json.dump( {resultset.performance.score.name: resultset.performance.score.value}, write_file, )
def main(): """ Main function that is used for model training. """ # Dynamically create an argument parser based on override parameters. args, template, hyper_parameters = parse_args() # Get new values from user's input. updated_hyper_parameters = gen_params_dict_from_args(args) # Override overridden parameters by user's values. override_parameters(updated_hyper_parameters, hyper_parameters) hyper_parameters = create(hyper_parameters) # Get classes for Task, ConfigurableParameters and Dataset. task_class = get_impl_class(template.entrypoints.base) dataset_class = get_dataset_class(template.task_type) # Create instances of Task, ConfigurableParameters and Dataset. dataset = dataset_class( train_subset={ "ann_file": args.train_ann_files, "data_root": args.train_data_roots, }, val_subset={"ann_file": args.val_ann_files, "data_root": args.val_data_roots}, ) environment = TaskEnvironment( model=None, hyper_parameters=hyper_parameters, label_schema=generate_label_schema(dataset, template.task_type), model_template=template, ) if args.load_weights: environment.model = ModelEntity( train_dataset=dataset, configuration=environment.get_model_configuration(), model_adapters={ "weights.pth": ModelAdapter(read_binary(args.load_weights)) }, ) if args.enable_hpo: run_hpo(args, environment, dataset, template.task_type) task = task_class(task_environment=environment) output_model = ModelEntity( dataset, environment.get_model_configuration(), model_status=ModelStatus.NOT_READY, ) task.train(dataset, output_model, train_parameters=TrainParameters()) save_model_data(output_model, args.save_model_to) validation_dataset = dataset.get_subset(Subset.VALIDATION) predicted_validation_dataset = task.infer( validation_dataset.with_empty_annotations(), InferenceParameters(is_evaluation=True), ) resultset = ResultSetEntity( model=output_model, ground_truth_dataset=validation_dataset, prediction_dataset=predicted_validation_dataset, ) task.evaluate(resultset) assert resultset.performance is not None print(resultset.performance)
def test_resultset_entity(self): """ <b>Description:</b> Check the ResultSetEntity can correctly return the value <b>Input data:</b> Mock data <b>Expected results:</b> Test passes if incoming data is processed correctly <b>Steps</b> 1. Create dummy data 2. Check the processing of default values 3. Check the processing of changed values """ test_data = { "model": None, "ground_truth_dataset": None, "prediction_dataset": None, "purpose": None, "performance": None, "creation_date": None, "id": None, } result_set = ResultSetEntity(**test_data) for name, value in test_data.items(): set_attr_name = f"test_{name}" if name in [ "model", "ground_truth_dataset", "prediction_dataset", "purpose", ]: assert getattr(result_set, name) == value setattr(result_set, name, set_attr_name) assert getattr(result_set, name) == set_attr_name assert result_set.performance == NullPerformance() assert type(result_set.creation_date) == datetime.datetime assert result_set.id == ID() assert result_set.has_score_metric() is False result_set.performance = "test_performance" assert result_set.performance != NullPerformance() assert result_set.has_score_metric() is True creation_date = self.creation_date result_set.creation_date = creation_date assert result_set.creation_date == creation_date set_attr_id = ID(123456789) result_set.id = set_attr_id assert result_set.id == set_attr_id test_result_set_repr = [ f"model={result_set.model}", f"ground_truth_dataset={result_set.ground_truth_dataset}", f"prediction_dataset={result_set.prediction_dataset}", f"purpose={result_set.purpose}", f"performance={result_set.performance}", f"creation_date={result_set.creation_date}", f"id={result_set.id}", ] for i in test_result_set_repr: assert i in repr(result_set)