Beispiel #1
0
def test_accuracy_string():
    accuracy_metric = Metric.from_string('accuracy')
    y_true_str = np.asarray([str(x) for x in [1, 0, 0, 0, 1]])
    y_1_mistake_str = np.asarray([str(x) for x in [1, 1, 0, 0, 1]])
    _test_metric(accuracy_metric,
                 y_true_str,
                 y_1_mistake_str,
                 optimal_score=1.0,
                 prediction_score=0.8)
Beispiel #2
0
def test_accuracy_numeric():
    accuracy_metric = Metric.from_string("accuracy")
    y_true = np.asarray([1, 0, 0, 0, 1])
    y_1_mistake = np.asarray([1, 1, 0, 0, 1])
    _test_metric(accuracy_metric,
                 y_true,
                 y_1_mistake,
                 max_score=1.0,
                 prediction_score=0.8)
Beispiel #3
0
def test_accuracy_string():
    accuracy_metric = Metric("accuracy")
    y_true_str = np.asarray([str(x) for x in [1, 0, 0, 0, 1]])
    y_1_mistake_str = np.asarray([str(x) for x in [1, 1, 0, 0, 1]])
    _test_metric(
        accuracy_metric,
        y_true_str,
        y_1_mistake_str,
        max_score=1.0,
        prediction_score=0.8,
    )
Beispiel #4
0
def test_logloss_numeric():
    log_loss_metric = Metric.from_string('log_loss')
    y_true = np.asarray([1, 0, 0, 0, 1])
    y_1_mistake_ohe = np.asarray([[0, 1], [0, 1], [1, 0], [1, 0], [0, 1]])
    one_mistake_logloss = 6.907755278982137
    _test_metric(log_loss_metric,
                 y_true,
                 y_1_mistake_ohe,
                 optimal_score=0,
                 prediction_score=one_mistake_logloss)

    y_true_ohe = np.asarray([[0, 1], [1, 0], [1, 0], [1, 0], [0, 1]])
    y_probabilities = np.asarray([[0.1, 0.9], [0.2, 0.8], [0.7, 0.3],
                                  [0.95, 0.05], [0.1, 0.9]])
    probabilities_logloss = 0.44562543641520713
    _test_metric(log_loss_metric,
                 y_true_ohe,
                 y_probabilities,
                 optimal_score=0,
                 prediction_score=probabilities_logloss)
Beispiel #5
0
def test_logloss_numeric():
    log_loss_metric = Metric("neg_log_loss")
    y_true = np.asarray([1, 0, 0, 0, 1])
    y_1_mistake_ohe = np.asarray([[0, 1], [0, 1], [1, 0], [1, 0], [0, 1]])
    one_mistake_logloss = -6.907755278982137
    _test_metric(
        log_loss_metric,
        y_true,
        y_1_mistake_ohe,
        max_score=0,
        prediction_score=one_mistake_logloss,
    )

    y_true_ohe = np.asarray([[0, 1], [1, 0], [1, 0], [1, 0], [0, 1]])
    y_probabilities = np.asarray([[0.1, 0.9], [0.2, 0.8], [0.7, 0.3],
                                  [0.95, 0.05], [0.1, 0.9]])
    probabilities_logloss = -0.44562543641520713
    _test_metric(
        log_loss_metric,
        y_true_ohe,
        y_probabilities,
        max_score=0,
        prediction_score=probabilities_logloss,
    )
Beispiel #6
0
        If model_library is specified, model_library_directory is ignored.

        Parameters
        ----------
        metric: string or Metric
            Metric to optimize the ensemble towards.
        y: pandas.DataFrame
            True labels for the predictions made by the models in the library.
        evaluation_library: `gama.utilities.evaluation_library.EvaluationLibrary`
            A list of models from which an ensemble can be built.
        shrink_on_pickle: bool (default=True)
            If True, remove memory-intensive attributes that are required before pickling.
            When unpickled, the model can be used to create predictions, but the ensemble can't be changed.
        """
        if isinstance(metric, str):
            metric = Metric(metric)
        elif not isinstance(metric, Metric):
            raise ValueError("metric must be specified as string or `gama.ea.metrics.Metric`.")

        if evaluation_library is None:
            raise ValueError("`evaluation_library` is None but must be EvaluationLibrary.")
        elif not isinstance(evaluation_library, EvaluationLibrary):
            raise TypeError("`evaluation_library` must be of type gama.utilities.evaluation_library.EvaluationLibrary.")

        if not isinstance(y, (pd.Series, pd.DataFrame)):
            raise TypeError(f"`y_true` must be of type pandas.DataFrame or pandas.Series but is {type(y)}.")

        self._metric = metric
        self.evaluation_library = evaluation_library
        self._model_library = []
        self._use_top_n_only = use_top_n_only
Beispiel #7
0
def test_scoring_to_metric_mixed():
    metrics = list(all_metrics)
    mixed_metrics = [Metric.from_string(metric)
                     for metric in metrics[:2]] + metrics[2:]
    scoring_to_metric(mixed_metrics)
Beispiel #8
0
def test_all_metrics_instantiate():
    for metric in all_metrics:
        Metric.from_string(metric)
Beispiel #9
0
def test_all_metrics_instantiate():
    for metric in all_metrics:
        Metric(metric)
Beispiel #10
0
        Parameters
        ----------
        metric: string or Metric
            Metric to optimize the ensemble towards.
        y: pandas.DataFrame
            True labels for the predictions made by the models in the library.
        evaluation_library: `gama.utilities.evaluation_library.EvaluationLibrary`
            A list of models from which an ensemble can be built.
        shrink_on_pickle: bool (default=True)
            If True, remove memory-intensive attributes that are required before pickle.
            When unpickled, the model can be used to create predictions,
            but the ensemble can't be changed.
        """
        if isinstance(metric, str):
            metric = Metric.from_string(metric)
        elif not isinstance(metric, Metric):
            raise ValueError(
                "metric must be specified as string or `gama.ea.metrics.Metric`."
            )

        if evaluation_library is None:
            raise ValueError(
                "`evaluation_library` is None but must be EvaluationLibrary.")
        elif not isinstance(evaluation_library, EvaluationLibrary):
            raise TypeError(
                "`evaluation_library` must be of type "
                "gama.utilities.evaluation_library.EvaluationLibrary.")

        if not isinstance(y, (pd.Series, pd.DataFrame)):
            raise TypeError(