def test_greater_is_better_distinction(self): """ Test for method greater_is_better_distinction. Should return Boolean or raise NotImplementedError. """ for implemented_metric in self.all_implemented_metrics: self.assertIn( Scorer.greater_is_better_distinction(implemented_metric), [True, False]) for not_implemented_metric in self.some_not_implemented_metrics: with self.assertRaises(NameError): Scorer.greater_is_better_distinction(not_implemented_metric)
def test_calculate_metrics(self): """ Test for method calculate_metrics. Handle all given metrics with a scorer call. """ for implemented_metric in self.all_implemented_metrics: self.assertIsInstance(Scorer.calculate_metrics([1, 1, 0, 1], [0, 1, 0, 1], [implemented_metric])[implemented_metric], float) for not_implemented_metric in self.some_not_implemented_metrics: np.testing.assert_equal(Scorer.calculate_metrics([1, 1, 0, 1], [0, 1, 0, 1], [not_implemented_metric])[not_implemented_metric], np.nan)
def get_minimum_config_evaluations(self): config_evaluations = self.get_config_evaluations() minimum_config_evaluations = dict() for metric, evaluations in config_evaluations.items(): minimum_config_evaluations[metric] = list() greater_is_better = Scorer.greater_is_better_distinction(metric) for fold in evaluations: fold_evaluations = list() if greater_is_better: for i, config in enumerate(fold): if i == 0: last_config = config else: if config > last_config: last_config = config fold_evaluations.append(last_config) else: last_config = np.inf for i, config in enumerate(fold): if i == 0: last_config = config else: if config < last_config: last_config = config fold_evaluations.append(last_config) minimum_config_evaluations[metric].append(fold_evaluations) return minimum_config_evaluations
def metric(self, value): """ Setter for attribute metric. :param value: metric value :return: """ try: self._metric = value self._greater_is_better = Scorer.greater_is_better_distinction( self._metric) except NameError: self._metric = "unknown" logger.warning( "Your metric is not supported. Performance constraints are constantly False." )
def test_doubled_custom_metric(self): def custom_metric(y_true, y_pred): return 99.9 Scorer.register_custom_metric(('a_custom_metric', custom_metric)) with self.assertRaises(Warning): Scorer.register_custom_metric(('a_custom_metric', custom_metric)) with self.assertRaises(ValueError): Scorer.register_custom_metric(None)
def process_fit_results(config_item, calculate_metrics_across_folds, calculate_metrics_per_fold, metrics): overall_y_pred_test = [] overall_y_true_test = [] overall_y_pred_train = [] overall_y_true_train = [] for fold in config_item.inner_folds: curr_test_fold = fold.validation curr_train_fold = fold.training if calculate_metrics_across_folds: # if we have one hot encoded values -> concat horizontally if isinstance(curr_test_fold.y_pred, np.ndarray): if len(curr_test_fold.y_pred.shape) > 1: axis = 1 else: axis = 0 else: # if we have lists concat axis = 0 overall_y_true_test = np.concatenate( (overall_y_true_test, curr_test_fold.y_true), axis=axis) overall_y_pred_test = np.concatenate( (overall_y_pred_test, curr_test_fold.y_pred), axis=axis) # we assume y_pred from the training set comes in the same shape as y_pred from the test se overall_y_true_train = np.concatenate( (overall_y_true_train, curr_train_fold.y_true), axis=axis) overall_y_pred_train = np.concatenate( (overall_y_pred_train, curr_train_fold.y_pred), axis=axis) # metrics across folds metrics_to_calculate = list(metrics) if "score" in metrics_to_calculate: metrics_to_calculate.remove("score") metrics_train = Scorer.calculate_metrics( overall_y_true_train, overall_y_pred_train, metrics_to_calculate) metrics_test = Scorer.calculate_metrics( overall_y_true_test, overall_y_pred_test, metrics_to_calculate) def metric_to_db_class(metric_list): db_metrics = [] for metric_name, metric_value in metric_list.items(): new_metric = MDBFoldMetric( operation=FoldOperations.RAW, metric_name=metric_name, value=metric_value, ) db_metrics.append(new_metric) return db_metrics db_metrics_train = metric_to_db_class(metrics_train) db_metrics_test = metric_to_db_class(metrics_test) # if we want to have metrics for each fold as well, calculate mean and std. if calculate_metrics_per_fold: db_metrics_fold_train, db_metrics_fold_test = MDBHelper.aggregate_metrics_for_inner_folds( config_item.inner_folds, metrics) config_item.metrics_train = db_metrics_train + db_metrics_fold_train config_item.metrics_test = db_metrics_test + db_metrics_fold_test else: config_item.metrics_train = db_metrics_train config_item.metrics_test = db_metrics_test elif calculate_metrics_per_fold: # calculate mean and std over all fold metrics config_item.metrics_train, config_item.metrics_test = MDBHelper.aggregate_metrics_for_inner_folds( config_item.inner_folds, metrics)
def test_METRIC_SIGN_ZERO(): assert Scorer.metric_sign("SC") == 0
def test_METRIC_SIGN_POS(): assert Scorer.metric_sign("FM") == 1
def test_is_is_element_type_Transformer(): assert Scorer.is_element_type("Transformer")
def test_is_machine_learning_type(): assert Scorer.is_machine_learning_type("Clustering")
def test_metric_is_metric_classif(): assert Scorer.is_metric('recall')
def test_metric_is_element_type_error(): with pytest.raises(PhotonaiError): assert Scorer.is_element_type('fred')
def test_metric_is_element_type_estimator(): assert Scorer.is_element_type(Scorer.ELEMENT_TYPES[Scorer.ESTID])
def test_metric_is_element_type(): assert Scorer.is_element_type(Scorer.ELEMENT_TYPES[Scorer.TRANID])
def test_create_metric_FM(): assert type(Scorer.create("FM")) == type(fowlkes_mallows_score)
def test_calculate_metric_HCV(): yt = [1, 1, 1, 1] yp = [1, 1, 1, 0] metrics = ["HCV"] s = Scorer() assert s.calculate_metrics(yt, yp, metrics) == {"HCV": 1.0}
def test_is_machine_learning_type_bad(): with pytest.raises(PhotonaiError): assert Scorer.is_machine_learning_type("fred")
def test_metric_is_metric_linear(): assert Scorer.is_metric('mean_absolute_error')
def test_is_is_element_type_bad(): with pytest.raises(PhotonaiError): assert Scorer.is_element_type("Clustering")
def test_metric_is_metric_cluster(): assert Scorer.is_metric('CH')
def test_is_is_element_type_Estimator(): assert Scorer.is_element_type("Estimator")
def test_metric_is_metricerror(): with pytest.raises(PhotonaiError): assert Scorer.is_metric('fred')
def test_greater_is_better_distinction_POS(): assert Scorer.greater_is_better_distinction("FM") == True
def test_metric_sign_FM(): assert Scorer.metric_sign("FM") == Scorer.SCORE_SIGN[Scorer.SCORE_POSID]
def test_greater_is_better_distinction_ZERO(): with pytest.raises(PhotonaiError): assert Scorer.greater_is_better_distinction("SC") == True
def test_metric_sign_SC(): assert Scorer.metric_sign("SC") == Scorer.SCORE_SIGN[Scorer.SCORE_ZEROID]
def score(estimator, X, y_true, metrics, indices=[], calculate_metrics: bool = True, training: bool = False, **kwargs): """ Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics :param estimator: the pipeline or pipeline element for prediction :param X: the data for prediction :param y_true: the truth values for the data :param metrics: the metrics to be calculated :param indices: the indices of the given data and targets that are logged into the result tree :param training: if True, all training_only pipeline elements are executed, if False they are skipped :param calculate_metrics: if True, calculates metrics for given data :return: ScoreInformation object """ scoring_time_start = time.time() output_metrics = {} non_default_score_metrics = list(metrics) # that does not work because it is not an exact match and also reacts to e.g. f1_score # if 'score' in metrics: # so we use this: checklist = ["score"] matches = set(checklist).intersection(set(non_default_score_metrics)) if len(matches) > 0: # Todo: Here it is potentially slowing down!!!!!!!!!!!!!!!! default_score = estimator.score(X, y_true) output_metrics["score"] = default_score non_default_score_metrics.remove("score") if not training: y_pred = estimator.predict(X, **kwargs) else: X, y_true_new, kwargs_new = estimator.transform( X, y_true, **kwargs) if y_true_new is not None: y_true = y_true_new if kwargs_new is not None and len(kwargs_new) > 0: kwargs = kwargs_new y_pred = estimator.predict(X, training=True, **kwargs) # Nice to have # InnerFoldManager.plot_some_data(y_true, y_pred) if calculate_metrics: score_metrics = Scorer.calculate_metrics( y_true, y_pred, non_default_score_metrics) # add default metric if output_metrics: output_metrics = {**output_metrics, **score_metrics} else: output_metrics = score_metrics else: output_metrics = {} final_scoring_time = time.time() - scoring_time_start probabilities = [] if hasattr(estimator, "_final_estimator"): if hasattr(estimator._final_estimator.base_element, "predict_proba"): probabilities = estimator.predict_proba(X, training=training, **kwargs) try: if probabilities is not None: if not len(probabilities) == 0: probabilities = probabilities.tolist() except: warnings.warn("No probabilities available.") if not isinstance(y_pred, list): y_pred = np.asarray(y_pred).tolist() if not isinstance(y_true, list): y_true = np.asarray(y_true).tolist() score_result_object = MDBScoreInformation( metrics=output_metrics, score_duration=final_scoring_time, y_pred=y_pred, y_true=y_true, indices=np.asarray(indices).tolist(), probabilities=probabilities, ) return score_result_object
def test_metric_sign_bad(): with pytest.raises(PhotonaiError): assert Scorer.metric_sign("fred") == Scorer.SCORE_SIGN[ Scorer.SCORE_ZEROID]
def plot_optimizer_history(self, metric, title: str = 'Optimizer History', type: str = 'plot', reduce_scatter_by: Union[int, str] = 'auto', file: str = None): """ :param metric: specify metric that has been stored within the PHOTON results tree :param type: 'plot' or 'scatter' :param reduce_scatter_by: integer or string ('auto'), reduce the number of points plotted by scatter :param file: specify a filename if you want to save the plot :return: """ if metric not in self.results.hyperpipe_info.metrics: raise ValueError( 'Metric "{}" not stored in results tree'.format(metric)) config_evaluations = self.get_config_evaluations() minimum_config_evaluations = self.get_minimum_config_evaluations() # handle different lengths min_corresponding = len(min(config_evaluations[metric], key=len)) config_evaluations_corres = [ configs[:min_corresponding] for configs in config_evaluations[metric] ] minimum_config_evaluations_corres = [ configs[:min_corresponding] for configs in minimum_config_evaluations[metric] ] mean = np.nanmean(np.asarray(config_evaluations_corres), axis=0) mean_min = np.nanmean(np.asarray(minimum_config_evaluations_corres), axis=0) greater_is_better = Scorer.greater_is_better_distinction(metric) if greater_is_better: caption = 'Maximum' else: caption = 'Minimum' plt.figure() if type == 'plot': plt.plot(np.arange(0, len(mean)), mean, '-', color='gray', label='Mean Performance') elif type == 'scatter': # now do smoothing if isinstance(reduce_scatter_by, str): if reduce_scatter_by != 'auto': logger.warning( '{} is not a valid smoothing_kernel specifier. Falling back to "auto".' .format(reduce_scatter_by)) # if auto, then calculate size of reduce_scatter_by so that 75 points on x remain # smallest reduce_scatter_by should be 1 reduce_scatter_by = max( [np.floor(min_corresponding / 75).astype(int), 1]) if reduce_scatter_by > 1: plt.plot([], [], ' ', label="scatter reduced by factor {}".format( reduce_scatter_by)) for i, fold in enumerate(config_evaluations[metric]): # add a few None so that list can be divided by smoothing_kernel remaining = len(fold) % reduce_scatter_by if remaining: fold.extend([np.nan] * (reduce_scatter_by - remaining)) # calculate mean over every n named_steps so that plot is less cluttered reduced_fold = np.nanmean(np.asarray(fold).reshape( -1, reduce_scatter_by), axis=1) reduced_xfit = np.arange(reduce_scatter_by / 2, len(fold), step=reduce_scatter_by) if i == len(config_evaluations[metric]) - 1: plt.scatter(reduced_xfit, np.asarray(reduced_fold), color='gray', alpha=0.5, label='Performance', marker='.') else: plt.scatter(reduced_xfit, np.asarray(reduced_fold), color='gray', alpha=0.5, marker='.') else: raise ValueError('Please specify either "plot" or "scatter".') plt.plot(np.arange(0, len(mean_min)), mean_min, '-', color='black', label='Mean {} Performance'.format(caption)) for i, fold in enumerate(minimum_config_evaluations[metric]): xfit = np.arange(0, len(fold)) plt.plot(xfit, fold, '-', color='black', alpha=0.5) plt.ylabel(metric.replace('_', ' ')) plt.xlabel('No of Evaluations') plt.legend() plt.title(title) if file: plt.savefig(file) else: if self.output_settings: file = os.path.join(self.output_settings.results_folder, "optimizer_history.png") plt.savefig(file) plt.close()
def test_calculate_metric_accuracy(): yt = [1, 1, 1, 1] yp = [1, 1, 1, 0] metrics = ["accuracy"] s = Scorer() assert s.calculate_metrics(yt, yp, metrics) == {"accuracy": 0.75}