Ejemplo n.º 1
0
def test_check_scoring_and_check_multimetric_scoring_errors(scoring, msg):
    # Make sure it raises errors when scoring parameter is not valid.
    # More weird corner cases are tested at test_validation.py
    estimator = EstimatorWithFitAndPredict()
    estimator.fit([[1]], [1])

    with pytest.raises(ValueError, match=msg):
        _check_multimetric_scoring(estimator, scoring=scoring)
Ejemplo n.º 2
0
def test_check_scoring_and_check_multimetric_scoring_errors(scoring):
    # Make sure it raises errors when scoring parameter is not valid.
    # More weird corner cases are tested at test_validation.py
    estimator = EstimatorWithFitAndPredict()
    estimator.fit([[1]], [1])

    error_message_regexp = ".*must be unique strings.*"
    with pytest.raises(ValueError, match=error_message_regexp):
        _check_multimetric_scoring(estimator, scoring=scoring)
Ejemplo n.º 3
0
def test_check_scoring_and_check_multimetric_scoring():
    check_scoring_validator_for_single_metric_usecases(check_scoring)
    # To make sure the check_scoring is correctly applied to the constituent
    # scorers
    check_scoring_validator_for_single_metric_usecases(
        check_multimetric_scoring_single_metric_wrapper)

    # For multiple metric use cases
    # Make sure it works for the valid cases
    for scoring in (('accuracy', ), ['precision'], {
            'acc': 'accuracy',
            'precision': 'precision'
    }, ('accuracy', 'precision'), ['precision', 'accuracy'], {
            'accuracy': make_scorer(accuracy_score),
            'precision': make_scorer(precision_score)
    }):
        estimator = LinearSVC(random_state=0)
        estimator.fit([[1], [2], [3]], [1, 1, 0])

        scorers, is_multi = _check_multimetric_scoring(estimator, scoring)
        assert is_multi
        assert isinstance(scorers, dict)
        assert sorted(scorers.keys()) == sorted(list(scoring))
        assert all([
            isinstance(scorer, _PredictScorer)
            for scorer in list(scorers.values())
        ])

        if 'acc' in scoring:
            assert_almost_equal(
                scorers['acc'](estimator, [[1], [2], [3]], [1, 0, 0]), 2. / 3.)
        if 'accuracy' in scoring:
            assert_almost_equal(
                scorers['accuracy'](estimator, [[1], [2], [3]], [1, 0, 0]),
                2. / 3.)
        if 'precision' in scoring:
            assert_almost_equal(
                scorers['precision'](estimator, [[1], [2], [3]], [1, 0, 0]),
                0.5)

    estimator = EstimatorWithFitAndPredict()
    estimator.fit([[1]], [1])

    # Make sure it raises errors when scoring parameter is not valid.
    # More weird corner cases are tested at test_validation.py
    error_message_regexp = ".*must be unique strings.*"
    for scoring in (
        (
            make_scorer(precision_score),  # Tuple of callables
            make_scorer(accuracy_score)),
        [5],
        (make_scorer(precision_score), ),
        (),
        ('f1', 'f1')):
        with pytest.raises(ValueError, match=error_message_regexp):
            _check_multimetric_scoring(estimator, scoring=scoring)
Ejemplo n.º 4
0
def _check_multimetric_scoring(estimator, scoring=None):
    # TODO: See if scikit-learn 0.24 solves the need for using
    # a private method
    from sklearn.metrics._scorer import _check_multimetric_scoring
    from sklearn.metrics import check_scoring

    if SK_024:
        if callable(scoring) or isinstance(scoring, (type(None), str)):
            scorers = {"score": check_scoring(estimator, scoring=scoring)}
            return scorers, False
        return _check_multimetric_scoring(estimator, scoring), True
    return _check_multimetric_scoring(estimator, scoring)
    def test_fit_and_score_return_dict(self):

        # Scoring
        accuracy_scorer = make_scorer(accuracy_score, normalize='weighted')

        # Test estimator
        dumb = DummyClassifier(strategy='constant', constant=1)

        # Test custom scorer
        bagAccScorer = BagScorer(accuracy_scorer, sparse=True)

        # Rename for easier parameters
        X = self.train_bags
        y = self.train_labels
        scoring = {'bag-scorer': bagAccScorer}
        estimator = dumb
        groups = None
        cv = 3
        n_jobs = 3
        verbose = 0
        pre_dispatch = 6
        fit_params = None
        return_estimator = True
        error_score = 'raise'
        return_train_score = True
        parameters = None

        # Test _fit_and_score method
        X, y, groups = indexable(X, y, groups)
        cv = check_cv(cv, y, classifier=is_classifier(estimator))
        scorers = _check_multimetric_scoring(estimator, scoring=scoring)

        # Use one cross-validation split
        generator = cv.split(X, y, groups)
        # Get training and test split of training data
        train, test = next(generator)
        # Generate scores using BagScorer
        scores = _fit_and_score(clone(estimator),
                                X,
                                y,
                                scorers,
                                train,
                                test,
                                verbose,
                                parameters,
                                fit_params,
                                return_train_score=return_train_score,
                                return_times=True,
                                return_estimator=return_estimator,
                                return_n_test_samples=False,
                                error_score=error_score)

        # Returned dictionary contains keys
        self.assertIn('train_scores', scores.keys())
        self.assertIn('test_scores', scores.keys())
        self.assertIn('fit_time', scores.keys())
        self.assertIn('score_time', scores.keys())
        self.assertIn('estimator', scores.keys())

        return None
Ejemplo n.º 6
0
def test_multimetric_scorer_sanity_check():
    # scoring dictionary returned is the same as calling each scorer seperately
    scorers = {
        'a1': 'accuracy',
        'a2': 'accuracy',
        'll1': 'neg_log_loss',
        'll2': 'neg_log_loss',
        'ra1': 'roc_auc',
        'ra2': 'roc_auc'
    }

    X, y = make_classification(random_state=0)

    clf = DecisionTreeClassifier()
    clf.fit(X, y)

    scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
    multi_scorer = _MultimetricScorer(**scorer_dict)

    result = multi_scorer(clf, X, y)

    seperate_scores = {
        name: get_scorer(name)(clf, X, y)
        for name in ['accuracy', 'neg_log_loss', 'roc_auc']
    }

    for key, value in result.items():
        score_name = scorers[key]
        assert_allclose(value, seperate_scores[score_name])
Ejemplo n.º 7
0
def test_check_scoring_and_check_multimetric_scoring(scoring):
    check_scoring_validator_for_single_metric_usecases(check_scoring)
    # To make sure the check_scoring is correctly applied to the constituent
    # scorers

    estimator = LinearSVC(random_state=0)
    estimator.fit([[1], [2], [3]], [1, 1, 0])

    scorers = _check_multimetric_scoring(estimator, scoring)
    assert isinstance(scorers, dict)
    assert sorted(scorers.keys()) == sorted(list(scoring))
    assert all([
        isinstance(scorer, _PredictScorer) for scorer in list(scorers.values())
    ])

    if "acc" in scoring:
        assert_almost_equal(
            scorers["acc"](estimator, [[1], [2], [3]], [1, 0, 0]), 2.0 / 3.0)
    if "accuracy" in scoring:
        assert_almost_equal(
            scorers["accuracy"](estimator, [[1], [2], [3]], [1, 0, 0]),
            2.0 / 3.0)
    if "precision" in scoring:
        assert_almost_equal(
            scorers["precision"](estimator, [[1], [2], [3]], [1, 0, 0]), 0.5)
Ejemplo n.º 8
0
def _check_multimetric_scoring(estimator, scoring=None):
    if SK_022:
        from sklearn.metrics._scorer import _check_multimetric_scoring
    else:
        from sklearn.metrics.scorer import _check_multimetric_scoring

    return _check_multimetric_scoring(estimator, scoring)
Ejemplo n.º 9
0
def test_multimetric_scorer_sanity_check():
    # scoring dictionary returned is the same as calling each scorer separately
    scorers = {
        "a1": "accuracy",
        "a2": "accuracy",
        "ll1": "neg_log_loss",
        "ll2": "neg_log_loss",
        "ra1": "roc_auc",
        "ra2": "roc_auc",
    }

    X, y = make_classification(random_state=0)

    clf = DecisionTreeClassifier()
    clf.fit(X, y)

    scorer_dict = _check_multimetric_scoring(clf, scorers)
    multi_scorer = _MultimetricScorer(**scorer_dict)

    result = multi_scorer(clf, X, y)

    separate_scores = {
        name: get_scorer(name)(clf, X, y)
        for name in ["accuracy", "neg_log_loss", "roc_auc"]
    }

    for key, value in result.items():
        score_name = scorers[key]
        assert_allclose(value, separate_scores[score_name])
Ejemplo n.º 10
0
def test_multimetric_scorer_calls_method_once(scorers, expected_predict_count,
                                              expected_predict_proba_count,
                                              expected_decision_func_count):
    X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])

    mock_est = Mock()
    fit_func = Mock(return_value=mock_est)
    predict_func = Mock(return_value=y)

    pos_proba = np.random.rand(X.shape[0])
    proba = np.c_[1 - pos_proba, pos_proba]
    predict_proba_func = Mock(return_value=proba)
    decision_function_func = Mock(return_value=pos_proba)

    mock_est.fit = fit_func
    mock_est.predict = predict_func
    mock_est.predict_proba = predict_proba_func
    mock_est.decision_function = decision_function_func

    scorer_dict, _ = _check_multimetric_scoring(LogisticRegression(), scorers)
    multi_scorer = _MultimetricScorer(**scorer_dict)
    results = multi_scorer(mock_est, X, y)

    assert set(scorers) == set(results)  # compare dict keys

    assert predict_func.call_count == expected_predict_count
    assert predict_proba_func.call_count == expected_predict_proba_count
    assert decision_function_func.call_count == expected_decision_func_count
Ejemplo n.º 11
0
    def fit(self, X, y=None, *, groups=None, **fit_params):
        """Run fit with all sets of parameters.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like of shape (n_samples, n_output) \
            or (n_samples,), default=None
            Target relative to X for classification or regression;
            None for unsupervised learning.
        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set. Only used in conjunction with a "Group" :term:`cv`
            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
        **fit_params : dict of str -> object
            Parameters passed to the ``fit`` method of the estimator
        """
        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        X, y, groups = indexable(X, y, groups)

        self._run_search(X, y, cv)

        return self
Ejemplo n.º 12
0
def main(inputs, infile_estimator, outfile_eval, infile1=None, infile2=None):
    """
    Parameter
    ---------
    inputs : str
        File path to galaxy tool parameter

    infile_estimator : strgit
        File path to trained estimator input

    outfile_eval : str
        File path to save the evalulation results, tabular

    infile1 : str
        File path to dataset containing features

    infile2 : str
        File path to dataset containing target values
    """
    warnings.filterwarnings('ignore')

    with open(inputs, 'r') as param_handler:
        params = json.load(param_handler)

    X_test, y_test = _get_X_y(params, infile1, infile2)

    # load model
    estimator = load_model_from_h5(infile_estimator)
    estimator = clean_params(estimator)

    # handle scorer, convert to scorer dict
    scoring = params['scoring']
    scorer = get_scoring(scoring)
    if not isinstance(scorer, (dict, list)):
        scorer = [scoring['primary_scoring']]
    scorer = _check_multimetric_scoring(estimator, scoring=scorer)

    if hasattr(estimator, 'evaluate'):
        scores = estimator.evaluate(X_test, y_test=y_test, scorer=scorer)
    else:
        scores = _score(estimator, X_test, y_test, scorer)

    # handle output
    for name, score in scores.items():
        scores[name] = [score]
    df = pd.DataFrame(scores)
    df = df[sorted(df.columns)]
    df.to_csv(path_or_buf=outfile_eval, sep='\t', header=True, index=False)
Ejemplo n.º 13
0
def check_multimetric_scoring_single_metric_wrapper(*args, **kwargs):
    # This wraps the _check_multimetric_scoring to take in
    # single metric scoring parameter so we can run the tests
    # that we will run for check_scoring, for check_multimetric_scoring
    # too for single-metric usecases

    scorers, is_multi = _check_multimetric_scoring(*args, **kwargs)
    # For all single metric use cases, it should register as not multimetric
    assert not is_multi
    if args[0] is not None:
        assert scorers is not None
        names, scorers = zip(*scorers.items())
        assert len(scorers) == 1
        assert names[0] == 'score'
        scorers = scorers[0]
    return scorers
Ejemplo n.º 14
0
    def scoring(self, scoring):

        # Scorer
        scoring = _check_multimetric_scoring(self.estimator, scoring)

        # IF scoring is a tuple (older versions of scikit-learn), we take only the first element
        if isinstance(scoring, tuple):
            scoring = scoring[0]  # This is a dict of scorers

        self._scoring_dict = scoring

        # Make it efficient
        scoring = _MultimetricScorer(
            **scoring
        )  # This is a single function returning a dict (with the metrics)
        self._scoring = scoring
Ejemplo n.º 15
0
def cross_val_score_weighted(estimator,
                             x_data,
                             y_data=None,
                             groups=None,
                             scoring=None,
                             cv=None,
                             n_jobs=None,
                             verbose=0,
                             fit_params=None,
                             pre_dispatch='2*n_jobs',
                             error_score=np.nan,
                             sample_weights=None):
    """Expand :func:`sklearn.model_selection.cross_val_score`."""
    scorer = check_scoring(estimator, scoring=scoring)
    scorer_name = 'score'
    scoring = {scorer_name: scorer}
    x_data, y_data, groups = indexable(x_data, y_data, groups)

    cv = check_cv(cv, y_data, classifier=is_classifier(estimator))
    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs,
                        verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(
        delayed(_fit_and_score_weighted)(clone(estimator),
                                         x_data,
                                         y_data,
                                         scorers,
                                         train,
                                         test,
                                         verbose,
                                         None,
                                         fit_params,
                                         error_score=error_score,
                                         sample_weights=sample_weights)
        for train, test in cv.split(x_data, y_data, groups))

    test_scores = list(zip(*scores))[0]
    test_scores = _aggregate_score_dicts(test_scores)

    return np.array(test_scores[scorer_name])
Ejemplo n.º 16
0
def test_multimetric_scorer_calls_method_once_classifier_no_decision():
    predict_proba_call_cnt = 0

    class MockKNeighborsClassifier(KNeighborsClassifier):
        def predict_proba(self, X):
            nonlocal predict_proba_call_cnt
            predict_proba_call_cnt += 1
            return super().predict_proba(X)

    X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])

    # no decision function
    clf = MockKNeighborsClassifier(n_neighbors=1)
    clf.fit(X, y)

    scorers = ['roc_auc', 'neg_log_loss']
    scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
    scorer = _MultimetricScorer(**scorer_dict)
    scorer(clf, X, y)

    assert predict_proba_call_cnt == 1
Ejemplo n.º 17
0
def _skl_check_scorers(scoring, refit):

    scorers, multimetric_ = _check_multimetric_scoring(GenSVM(),
                                                       scoring=scoring)
    if multimetric_:
        if refit is not False and (not isinstance(refit, str)
                                   or refit not in scorers):
            raise ValueError("For multi-metric scoring, the parameter "
                             "refit must be set to a scorer key "
                             "to refit an estimator with the best "
                             "parameter setting on the whole data and "
                             "make the best_* attributes "
                             "available for that metric. kjIf this is not "
                             "needed, refit should be set to False "
                             "explicitly. %r was passed." % refit)
        else:
            refit_metric = refit
    else:
        refit_metric = "score"

    return scorers, multimetric_, refit_metric
Ejemplo n.º 18
0
def test_multimetric_scorer_calls_method_once_regressor_threshold():
    predict_called_cnt = 0

    class MockDecisionTreeRegressor(DecisionTreeRegressor):
        def predict(self, X):
            nonlocal predict_called_cnt
            predict_called_cnt += 1
            return super().predict(X)

    X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])

    # no decision function
    clf = MockDecisionTreeRegressor()
    clf.fit(X, y)

    scorers = {'neg_mse': 'neg_mean_squared_error', 'r2': 'roc_auc'}
    scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
    scorer = _MultimetricScorer(**scorer_dict)
    scorer(clf, X, y)

    assert predict_called_cnt == 1
Ejemplo n.º 19
0
    def fit(self, X, y=None, *, groups=None, **fit_params):
        """Run fit with all sets of parameters.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like of shape (n_samples, n_output) \
            or (n_samples,), default=None
            Target relative to X for classification or regression;
            None for unsupervised learning.
        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set. Only used in conjunction with a "Group" :term:`cv`
            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
        **fit_params : dict of str -> object
            Parameters passed to the ``fit`` method of the estimator
        """
        estimator = self.estimator
        refit_metric = "score"

        if callable(self.scoring):
            scorers = self.scoring
        elif self.scoring is None or isinstance(self.scoring, str):
            scorers = check_scoring(self.estimator, self.scoring)
        else:
            scorers = _check_multimetric_scoring(self.estimator, self.scoring)
            self._check_refit_for_multimetric(scorers)
            refit_metric = self.refit

        #X, y, groups = indexable(X, y, groups) # todo debug
        fit_params = _check_fit_params(X, fit_params)

        cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))
        n_splits = cv_orig.get_n_splits(X, y, groups)

        base_estimator = clone(self.estimator)

        parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch)

        fit_and_score_kwargs = dict(scorer=scorers,
                                    fit_params=fit_params,
                                    return_train_score=self.return_train_score,
                                    return_n_test_samples=True,
                                    return_times=True,
                                    return_parameters=False,
                                    error_score=self.error_score,
                                    verbose=self.verbose)
        results = {}
        with parallel:
            all_candidate_params = []
            all_out = []
            all_more_results = defaultdict(list)

            def evaluate_candidates(candidate_params,
                                    cv=None,
                                    more_results=None):
                cv = cv or cv_orig
                candidate_params = list(candidate_params)
                n_candidates = len(candidate_params)

                if self.verbose > 0:
                    print("Fitting {0} folds for each of {1} candidates,"
                          " totalling {2} fits".format(
                              n_splits, n_candidates, n_candidates * n_splits))

                if self.online_train_val_split:
                    can = enumerate(candidate_params)
                    spl = enumerate(cv.split(X, None, groups))
                    lst = []
                    for (cand_idx, parameters), (split_idx,
                                                 (train,
                                                  test)) in product(can, spl):
                        lst.append(
                            delayed(_fit_and_score)(
                                clone(base_estimator),
                                X,
                                y,
                                train=train,
                                test=test,
                                parameters=parameters,
                                online_train_val_split=True,
                                **fit_and_score_kwargs))
                    out = parallel(lst)
                else:
                    can = enumerate(candidate_params)
                    spl = enumerate(cv.split(X, y, groups))
                    lst = []
                    for (cand_idx, parameters), (split_idx,
                                                 (train,
                                                  test)) in product(can, spl):
                        lst.append(
                            delayed(_fit_and_score)(
                                clone(base_estimator),
                                X,
                                y,
                                train=train,
                                test=test,
                                parameters=parameters,
                                split_progress=(split_idx, n_splits),
                                candidate_progress=(cand_idx, n_candidates),
                                online_train_val_split=False,
                                **fit_and_score_kwargs))
                    out = parallel(lst)


#                    out = parallel(delayed(_fit_and_score)(clone(base_estimator),
#                                                           X, y,
#                                                           train=train, test=test,
#                                                           parameters=parameters,
#                                                           split_progress=(
#                                                               split_idx,
#                                                               n_splits),
#                                                           candidate_progress=(
#                                                               cand_idx,
#                                                               n_candidates),
#                                                           **fit_and_score_kwargs)
#                                   for (cand_idx, parameters),
#                                       (split_idx, (train, test)) in product(
#                                       enumerate(candidate_params),
#                                       enumerate(cv.split(X, y, groups)))
#                                   )

                if len(out) < 1:
                    raise ValueError('No fits were performed. '
                                     'Was the CV iterator empty? '
                                     'Were there no candidates?')
                elif len(out) != n_candidates * n_splits:
                    raise ValueError('cv.split and cv.get_n_splits returned '
                                     'inconsistent results. Expected {} '
                                     'splits, got {}'.format(
                                         n_splits,
                                         len(out) // n_candidates))

                # For callable self.scoring, the return type is only know after
                # calling. If the return type is a dictionary, the error scores
                # can now be inserted with the correct key. The type checking
                # of out will be done in `_insert_error_scores`.
                if callable(self.scoring):
                    _insert_error_scores(out, self.error_score)
                all_candidate_params.extend(candidate_params)
                all_out.extend(out)
                if more_results is not None:
                    for key, value in more_results.items():
                        all_more_results[key].extend(value)

                nonlocal results
                results = self._format_results(all_candidate_params, n_splits,
                                               all_out, all_more_results)

                return results

            self._run_search(evaluate_candidates)

            # multimetric is determined here because in the case of a callable
            # self.scoring the return type is only known after calling
            first_test_score = all_out[0]['test_scores']
            self.multimetric_ = isinstance(first_test_score, dict)

            # check refit_metric now for a callabe scorer that is multimetric
            if callable(self.scoring) and self.multimetric_:
                self._check_refit_for_multimetric(first_test_score)
                refit_metric = self.refit

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            # If callable, refit is expected to return the index of the best
            # parameter set.
            if callable(self.refit):
                self.best_index_ = self.refit(results)
                if not isinstance(self.best_index_, np.numbers.Integral):
                    raise TypeError('best_index_ returned is not an integer')
                if (self.best_index_ < 0
                        or self.best_index_ >= len(results["params"])):
                    raise IndexError('best_index_ index out of range')
            else:
                self.best_index_ = results["rank_test_%s" %
                                           refit_metric].argmin()
                self.best_score_ = results["mean_test_%s" %
                                           refit_metric][self.best_index_]
            self.best_params_ = results["params"][self.best_index_]

        if self.refit:
            # we clone again after setting params in case some
            # of the params are estimators as well.
            self.best_estimator_ = clone(
                clone(base_estimator).set_params(**self.best_params_))

            refit_start_time = time.time()
            if isinstance(self.best_estimator_, Pipeline):
                self.best_estimator_.train()
                # todo set train intervall to whole dataset
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)

            if isinstance(self.best_estimator_, Pipeline):
                self.best_estimator_.prod()
            refit_end_time = time.time()
            self.refit_time_ = refit_end_time - refit_start_time

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 20
0
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('tpot_data_train.csv', sep=',')
tpot_data.columns = [c.lower() for c in tpot_data.columns.values]
tpot_data = tpot_data[features + labels]
tpot_data = tpot_data.rename(columns={'micro_confirmed': 'target'})

features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=None)

# Average CV score on the training set was: 0.739462953567469
exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=69),
    ExtraTreesClassifier(bootstrap=True,
                         criterion="gini",
                         max_features=0.6000000000000001,
                         min_samples_leaf=12,
                         min_samples_split=14,
                         n_estimators=100))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

print(_DEFAULT_METRICS)

scorers, _ = _check_multimetric_scoring(exported_pipeline,
                                        scoring=_DEFAULT_METRICS)
scores = _score(exported_pipeline, testing_features, testing_target, scorers)
print(scores)
scores = _aggregate_score_dicts(scores)
Ejemplo n.º 21
0
    def fit(self, X, y, groups=None, **fit_params):
        # sklearn prep
        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
        refit_metric = "score"

        if callable(self.scoring):
            scorers = self.scoring
        elif self.scoring is None or isinstance(self.scoring, str):
            scorers = check_scoring(self.estimator, self.scoring)
        else:
            scorers = _check_multimetric_scoring(self.estimator, self.scoring)
            # sklearn < 0.24.0 compatibility
            if isinstance(scorers, tuple):
                scorers = scorers[0]

            self._check_refit_for_multimetric(scorers)
            refit_metric = self.refit

        X, y, groups = indexable(X, y, groups)
        fit_params = _check_fit_params(X, fit_params)
        n_splits = cv.get_n_splits(X, y, groups)
        base_estimator = clone(self.estimator)
        rng = check_random_state(self.random_state)
        np.random.set_state(rng.get_state(legacy=True))
        np_random_seed = rng.get_state(legacy=True)[1][0]

        n_jobs, actual_iterations = self._calculate_n_jobs_and_actual_iters()

        # default port is 9090, we must have one, this is how BOHB workers communicate (even locally)
        run_id = f"HpBandSterSearchCV_{time.time()}"
        _nameserver = hpns.NameServer(run_id=run_id,
                                      host=self.nameserver_host,
                                      port=self.nameserver_port)

        gc.collect()

        if self.verbose > 1:
            _logger.setLevel(logging.DEBUG)
        elif self.verbose > 0:
            _logger.setLevel(logging.INFO)
        else:
            _logger.setLevel(logging.ERROR)

        if "logger" in self.bohb_kwargs:
            self.bohb_kwargs.pop("logger")

        with NameServerContext(_nameserver):
            workers = []
            # each worker is a separate thread
            for i in range(n_jobs):
                # SklearnWorker clones the estimator
                w = SklearnWorker(
                    min_budget=self.min_budget,
                    max_budget=self.max_budget,
                    base_estimator=self.estimator,
                    X=X,
                    y=y,
                    cv=cv,
                    cv_n_splits=n_splits,
                    groups=groups,
                    scoring=scorers,
                    metric=refit_metric,
                    fit_params=fit_params,
                    nameserver=self.nameserver_host,
                    nameserver_port=self.nameserver_port,
                    run_id=run_id,
                    id=i,
                    return_train_score=self.return_train_score,
                    error_score=self.error_score,
                    resource_name=self.resource_name,
                    resource_type=self.resource_type,
                    random_state=rng,
                    logger=_logger,
                )
                w.run(background=True)
                workers.append(w)

            converted_min_budget = float(workers[0].min_budget)
            converted_max_budget = float(workers[0].max_budget)
            self.resource_name_ = workers[0].resource_name

            if (self.resource_name_
                    in self.param_distributions.get_hyperparameter_names()):
                _logger.warning(
                    f"Found hyperparameter with name '{self.resource_name_}', same as resource_name_. Removing it from ConfigurationSpace."
                )
                param_distributions = CS.ConfigurationSpace(
                    name=self.param_distributions.name,
                    meta=self.param_distributions.meta,
                )
                param_distributions.add_hyperparameters([
                    x for x in self.param_distributions.get_hyperparameters()
                    if x.name != self.resource_name_
                ])
            else:
                param_distributions = deepcopy(self.param_distributions)
            param_distributions.seed = np_random_seed

            # sleep for a moment to make sure all workers are initialized
            sleep(0.2)

            # BOHB by default
            if isinstance(self.optimizer, str):
                optimizer = self._optimizer_dict[self.optimizer.lower()](
                    configspace=param_distributions,
                    run_id=run_id,
                    min_budget=converted_min_budget,
                    max_budget=converted_max_budget,
                    logger=_logger,
                    **self.bohb_kwargs,
                )
            else:
                optimizer = self.optimizer(
                    configspace=param_distributions,
                    run_id=run_id,
                    min_budget=converted_min_budget,
                    max_budget=converted_max_budget,
                    logger=_logger,
                    **self.bohb_kwargs,
                )
            with OptimizerContext(
                    optimizer,
                    n_iterations=actual_iterations,
            ) as res:
                self._res = res

        id2config = self._res.get_id2config_mapping()
        incumbent = self._res.get_incumbent_id()
        runs_all = self._res.get_all_runs()
        self.best_params_ = id2config[incumbent]["config"]

        resource_type = workers[0].resource_type
        self.n_resources_ = [resource_type(x) for x in optimizer.budgets]
        self.min_resources_ = self.n_resources_[0]
        self.max_resources_ = self.n_resources_[-1]

        results, new_refit_metric = self._runs_to_results(
            runs_all, id2config, scorers, n_splits, self.n_resources_)

        if new_refit_metric is not None:
            refit_metric = new_refit_metric

        iter_counter = sorted(Counter(results["iter"]).items())
        self.n_candidates_ = [x[1] for x in iter_counter]
        self.n_remaining_candidates_ = iter_counter[-1][1]
        self.n_iterations_ = iter_counter[-1][0] + 1

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            # If callable, refit is expected to return the index of the best
            # parameter set.
            if callable(self.refit):
                self.best_index_ = self.refit(results)
                if not isinstance(self.best_index_, numbers.Integral):
                    raise TypeError("best_index_ returned is not an integer")
                if self.best_index_ < 0 or self.best_index_ >= len(
                        results["params"]):
                    raise IndexError("best_index_ index out of range")
            else:
                self.best_index_ = results["rank_test_%s" %
                                           refit_metric].argmin()
                self.best_score_ = results["mean_test_%s" %
                                           refit_metric][self.best_index_]
            self.best_params_ = results["params"][self.best_index_]

        _logger.info(
            f"\nBest {refit_metric}: {self._res.get_runs_by_id(incumbent)[-1].info['test_score_mean']}"
        )
        _logger.info(f"Best found configuration: {self.best_params_}")
        _logger.info(
            f"A total of {len(id2config.keys())} unique configurations where sampled."
        )
        _logger.info(f"A total of {len(runs_all)} runs where executed.")
        _logger.info(
            f"Total budget of resource '{self.resource_name_}' corresponds to {sum([r.budget for r in runs_all]) / converted_max_budget} full function evaluations."
        )

        gc.collect()

        if self.refit:
            # we clone again after setting params in case some
            # of the params are estimators as well.
            refit_params = self.best_params_.copy()
            if self.resource_name_ != "n_samples":
                refit_params[self.resource_name_] = self.max_resources_
            self.best_estimator_ = clone(
                clone(base_estimator).set_params(**refit_params))
            refit_start_time = time.time()
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)
            refit_end_time = time.time()
            self.refit_time_ = refit_end_time - refit_start_time

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 22
0
def _cross_validate_with_warm_start(
    estimators,
    X,
    y=None,
    *,
    groups=None,
    scoring=None,
    cv=None,
    n_jobs=None,
    verbose=0,
    fit_params=None,
    pre_dispatch="2*n_jobs",
    return_train_score=False,
    return_estimator=False,
    error_score=np.nan,
):
    """Evaluate metric(s) by cross-validation and also record fit/score times.

    Read more in the :ref:`User Guide <multimetric_cross_validation>`.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape (n_samples, n_features)
        The data to fit. Can be for example a list, or an array.

    y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
            default=None
        The target variable to try to predict in the case of
        supervised learning.

    groups : array-like of shape (n_samples,), default=None
        Group labels for the samples used while splitting the dataset into
        train/test set. Only used in conjunction with a "Group" :term:`cv`
        instance (e.g., :class:`GroupKFold`).

    scoring : str, callable, list/tuple, or dict, default=None
        A single str (see :ref:`scoring_parameter`) or a callable
        (see :ref:`scoring`) to evaluate the predictions on the test set.

        For evaluating multiple metrics, either give a list of (unique) strings
        or a dict with names as keys and callables as values.

        NOTE that when using custom scorers, each scorer should return a single
        value. Metric functions returning a list/array of values can be wrapped
        into multiple scorers that return one value each.

        See :ref:`multimetric_grid_search` for an example.

        If None, the estimator's score method is used.

    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 5-fold cross validation,
        - int, to specify the number of folds in a `(Stratified)KFold`,
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        For int/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass, :class:`StratifiedKFold` is used. In all
        other cases, :class:`KFold` is used.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

        .. versionchanged:: 0.22
            ``cv`` default value if None changed from 3-fold to 5-fold.

    n_jobs : int, default=None
        The number of CPUs to use to do the computation.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    verbose : int, default=0
        The verbosity level.

    fit_params : dict, default=None
        Parameters to pass to the fit method of the estimator.

    pre_dispatch : int or str, default='2*n_jobs'
        Controls the number of jobs that get dispatched during parallel
        execution. Reducing this number can be useful to avoid an
        explosion of memory consumption when more jobs get dispatched
        than CPUs can process. This parameter can be:

            - None, in which case all the jobs are immediately
              created and spawned. Use this for lightweight and
              fast-running jobs, to avoid delays due to on-demand
              spawning of the jobs

            - An int, giving the exact number of total jobs that are
              spawned

            - A str, giving an expression as a function of n_jobs,
              as in '2*n_jobs'

    return_train_score : bool, default=False
        Whether to include train scores.
        Computing training scores is used to get insights on how different
        parameter settings impact the overfitting/underfitting trade-off.
        However computing the scores on the training set can be computationally
        expensive and is not strictly required to select the parameters that
        yield the best generalization performance.

        .. versionadded:: 0.19

        .. versionchanged:: 0.21
            Default value was changed from ``True`` to ``False``

    return_estimator : bool, default=False
        Whether to return the estimators fitted on each split.

        .. versionadded:: 0.20

    error_score : 'raise' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.

        .. versionadded:: 0.20

    Returns
    -------
    scores : dict of float arrays of shape (n_splits,)
        Array of scores of the estimator for each run of the cross validation.

        A dict of arrays containing the score/time arrays for each scorer is
        returned. The possible keys for this ``dict`` are:

            ``test_score``
                The score array for test scores on each cv split.
                Suffix ``_score`` in ``test_score`` changes to a specific
                metric like ``test_r2`` or ``test_auc`` if there are
                multiple scoring metrics in the scoring parameter.
            ``train_score``
                The score array for train scores on each cv split.
                Suffix ``_score`` in ``train_score`` changes to a specific
                metric like ``train_r2`` or ``train_auc`` if there are
                multiple scoring metrics in the scoring parameter.
                This is available only if ``return_train_score`` parameter
                is ``True``.
            ``fit_time``
                The time for fitting the estimator on the train
                set for each cv split.
            ``score_time``
                The time for scoring the estimator on the test set for each
                cv split. (Note time for scoring on the train set is not
                included even if ``return_train_score`` is set to ``True``
            ``estimator``
                The estimator objects for each cv split.
                This is available only if ``return_estimator`` parameter
                is set to ``True``.

    Examples
    --------
    >>> from sklearn import datasets, linear_model
    >>> from sklearn.model_selection import cross_validate
    >>> from sklearn.metrics import make_scorer
    >>> from sklearn.metrics import confusion_matrix
    >>> from sklearn.svm import LinearSVC
    >>> diabetes = datasets.load_diabetes()
    >>> X = diabetes.data[:150]
    >>> y = diabetes.target[:150]
    >>> lasso = linear_model.Lasso()

    Single metric evaluation using ``cross_validate``

    >>> cv_results = cross_validate(lasso, X, y, cv=3)
    >>> sorted(cv_results.keys())
    ['fit_time', 'score_time', 'test_score']
    >>> cv_results['test_score']
    array([0.33150734, 0.08022311, 0.03531764])

    Multiple metric evaluation using ``cross_validate``
    (please refer the ``scoring`` parameter doc for more information)

    >>> scores = cross_validate(lasso, X, y, cv=3,
    ...                         scoring=('r2', 'neg_mean_squared_error'),
    ...                         return_train_score=True)
    >>> print(scores['test_neg_mean_squared_error'])
    [-3635.5... -3573.3... -6114.7...]
    >>> print(scores['train_r2'])
    [0.28010158 0.39088426 0.22784852]

    See Also
    ---------
    :func:`sklearn.model_selection.cross_val_score`:
        Run cross-validation for single metric evaluation.

    :func:`sklearn.model_selection.cross_val_predict`:
        Get predictions from each split of cross-validation for diagnostic
        purposes.

    :func:`sklearn.metrics.make_scorer`:
        Make a scorer from a performance metric or loss function.

    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimators[0]))
    if callable(scoring):
        scorers = {"score": scoring}
    elif scoring is None or isinstance(scoring, str):
        scorers = {"score": check_scoring(estimators[0], scoring=scoring)}
    else:
        try:
            scorers = _check_multimetric_scoring(estimators[0], scoring=scoring)
            # sklearn < 0.24.0 compatibility
            if isinstance(scorers, tuple):
                scorers = scorers[0]
        except KeyError:
            pass

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.

    parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
    results_org = parallel(
        delayed(_fit_and_score)(
            estimators[i],
            X,
            y,
            scorers,
            train_test_tuple[0],
            train_test_tuple[1],
            verbose,
            None,
            fit_params[i] if isinstance(fit_params, list) else fit_params,
            return_train_score=return_train_score,
            return_times=True,
            return_n_test_samples=True,
            return_estimator=return_estimator,
            error_score=error_score,
        )
        for i, train_test_tuple in enumerate(cv.split(X, y, groups))
    )

    results = _aggregate_score_dicts(results_org)

    ret = {}
    ret["fit_time"] = results["fit_time"]
    ret["score_time"] = results["score_time"]

    if return_estimator:
        ret["estimator"] = results["estimator"]

    test_scores_dict = _normalize_score_results(results["test_scores"])
    if return_train_score:
        train_scores_dict = _normalize_score_results(results["train_scores"])

    for name in test_scores_dict:
        ret["test_%s" % name] = test_scores_dict[name]
        if return_train_score:
            key = "train_%s" % name
            ret[key] = train_scores_dict[name]

    return (ret, results_org)
Ejemplo n.º 23
0
    def _fit(self, X, y=None, target_col=None):
        """Fit estimator.

        Requiers to either specify the target as separate 1d array or Series y
        (in scikit-learn fashion) or as column of the dataframe X specified by
        target_col.
        If y is specified, X is assumed not to contain the target.

        Parameters
        ----------
        X : DataFrame
            Input features. If target_col is specified, X also includes the
            target.
        y : Series or numpy array, optional.
            Target. You need to specify either y or target_col.
        target_col : string or int, optional
            Column name of target if included in X.
        """
        X, y = _validate_Xyt(X, y, target_col, do_clean=False)
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X)
        types = detect_types(X, type_hints=self.type_hints)
        self.feature_names_ = X.columns
        self.types_ = types

        y, self.scoring_ = self._preprocess_target(y)
        self.log_ = []

        # reimplement cross-validation so we only do preprocessing once
        # This could/should be solved with dask?
        if isinstance(self, RegressorMixin):
            # this is how inheritance works, right?
            cv = KFold(n_splits=5,
                       shuffle=self.shuffle,
                       random_state=self.random_state)
        elif isinstance(self, ClassifierMixin):
            cv = StratifiedKFold(n_splits=5,
                                 shuffle=self.shuffle,
                                 random_state=self.random_state)
        data_preproc = []
        for i, (train, test) in enumerate(cv.split(X, y)):
            # maybe do two levels of preprocessing
            # to search over treatment of categorical variables etc
            # Also filter?
            verbose = self.verbose if i == 0 else 0
            sp = EasyPreprocessor(verbose=verbose, types=types)
            X_train = sp.fit_transform(X.iloc[train], y.iloc[train])
            X_test = sp.transform(X.iloc[test])
            data_preproc.append((X_train, X_test, y.iloc[train], y.iloc[test]))

        estimators = self._get_estimators()
        rank_scoring = self._rank_scoring
        self.current_best_ = {rank_scoring: -np.inf}
        for est in estimators:
            set_random_state(est, self.random_state)
            scorers, _ = _check_multimetric_scoring(est, self.scoring_)
            scores = self._evaluate_one(est, data_preproc, scorers)
            # make scoring configurable
            if scores[rank_scoring] > self.current_best_[rank_scoring]:
                if self.verbose:
                    print("=== new best {} (using {}):".format(
                        scores.name, rank_scoring))
                    print(_format_scores(scores))
                    print()

                self.current_best_ = scores
                best_est = est
        if self.verbose:
            print("\nBest model:\n{}\nBest Scores:\n{}".format(
                nice_repr(best_est), _format_scores(self.current_best_)))
        if self.refit:
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', UserWarning)
                self.est_ = make_pipeline(EasyPreprocessor(types=types),
                                          best_est)
                self.est_.fit(X, y)
        return self
Ejemplo n.º 24
0
def _evaluate_keras_and_sklearn_scores(estimator,
                                       data_generator,
                                       X,
                                       y=None,
                                       sk_scoring=None,
                                       steps=None,
                                       batch_size=32,
                                       return_predictions=False):
    """output scores for bother keras and sklearn metrics

    Parameters
    -----------
    estimator : object
        Fitted `galaxy_ml.keras_galaxy_models.KerasGBatchClassifier`.
    data_generator : object
        From `galaxy_ml.preprocessors.ImageDataFrameBatchGenerator`.
    X : 2-D array
        Contains indecies of images that need to be evaluated.
    y : None
        Target value.
    sk_scoring : dict
        Galaxy tool input parameters.
    steps : integer or None
        Evaluation/prediction steps before stop.
    batch_size : integer
        Number of samples in a batch
    return_predictions : bool, default is False
        Whether to return predictions and true labels.
    """
    scores = {}

    generator = data_generator.flow(X, y=y, batch_size=batch_size)
    # keras metrics evaluation
    # handle scorer, convert to scorer dict
    generator.reset()
    score_results = estimator.model_.evaluate_generator(generator, steps=steps)
    metrics_names = estimator.model_.metrics_names
    if not isinstance(metrics_names, list):
        scores[metrics_names] = score_results
    else:
        scores = dict(zip(metrics_names, score_results))

    if sk_scoring['primary_scoring'] == 'default' and\
            not return_predictions:
        return scores

    generator.reset()
    predictions, y_true = _predict_generator(estimator.model_,
                                             generator,
                                             steps=steps)

    # for sklearn metrics
    if sk_scoring['primary_scoring'] != 'default':
        scorer = get_scoring(sk_scoring)
        if not isinstance(scorer, (dict, list)):
            scorer = [sk_scoring['primary_scoring']]
        scorer = _check_multimetric_scoring(estimator, scoring=scorer)
        sk_scores = gen_compute_scores(y_true, predictions, scorer)
        scores.update(sk_scores)

    if return_predictions:
        return scores, predictions, y_true
    else:
        return scores, None, None
Ejemplo n.º 25
0
def main(inputs,
         infile_estimator,
         infile1,
         infile2,
         outfile_result,
         outfile_object=None,
         outfile_y_true=None,
         outfile_y_preds=None,
         groups=None,
         ref_seq=None,
         intervals=None,
         targets=None,
         fasta_path=None):
    """
    Parameter
    ---------
    inputs : str
        File path to galaxy tool parameter.

    infile_estimator : str
        File path to estimator.

    infile1 : str
        File path to dataset containing features.

    infile2 : str
        File path to dataset containing target values.

    outfile_result : str
        File path to save the results, either cv_results or test result.

    outfile_object : str, optional
        File path to save searchCV object.

    outfile_y_true : str, optional
        File path to target values for prediction.

    outfile_y_preds : str, optional
        File path to save predictions.

    groups : str
        File path to dataset containing groups labels.

    ref_seq : str
        File path to dataset containing genome sequence file.

    intervals : str
        File path to dataset containing interval file.

    targets : str
        File path to dataset compressed target bed file.

    fasta_path : str
        File path to dataset containing fasta file.
    """
    warnings.simplefilter('ignore')

    with open(inputs, 'r') as param_handler:
        params = json.load(param_handler)

    #  load estimator
    estimator = load_model_from_h5(infile_estimator)

    estimator = clean_params(estimator)

    # swap hyperparameter
    swapping = params['experiment_schemes']['hyperparams_swapping']
    swap_params = _eval_swap_params(swapping)
    estimator.set_params(**swap_params)

    estimator_params = estimator.get_params()

    # store read dataframe object
    loaded_df = {}

    input_type = params['input_options']['selected_input']
    # tabular input
    if input_type == 'tabular':
        header = 'infer' if params['input_options']['header1'] else None
        column_option = (params['input_options']['column_selector_options_1']
                         ['selected_column_selector_option'])
        if column_option in [
                'by_index_number', 'all_but_by_index_number', 'by_header_name',
                'all_but_by_header_name'
        ]:
            c = params['input_options']['column_selector_options_1']['col1']
        else:
            c = None

        df_key = infile1 + repr(header)
        df = pd.read_csv(infile1, sep='\t', header=header, parse_dates=True)
        loaded_df[df_key] = df

        X = read_columns(df, c=c, c_option=column_option).astype(float)
    # sparse input
    elif input_type == 'sparse':
        X = mmread(open(infile1, 'r'))

    # fasta_file input
    elif input_type == 'seq_fasta':
        pyfaidx = get_module('pyfaidx')
        sequences = pyfaidx.Fasta(fasta_path)
        n_seqs = len(sequences.keys())
        X = np.arange(n_seqs)[:, np.newaxis]
        for param in estimator_params.keys():
            if param.endswith('fasta_path'):
                estimator.set_params(**{param: fasta_path})
                break
        else:
            raise ValueError(
                "The selected estimator doesn't support "
                "fasta file input! Please consider using "
                "KerasGBatchClassifier with "
                "FastaDNABatchGenerator/FastaProteinBatchGenerator "
                "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
                "in pipeline!")

    elif input_type == 'refseq_and_interval':
        path_params = {
            'data_batch_generator__ref_genome_path': ref_seq,
            'data_batch_generator__intervals_path': intervals,
            'data_batch_generator__target_path': targets
        }
        estimator.set_params(**path_params)
        n_intervals = sum(1 for line in open(intervals))
        X = np.arange(n_intervals)[:, np.newaxis]

    # Get target y
    header = 'infer' if params['input_options']['header2'] else None
    column_option = (params['input_options']['column_selector_options_2']
                     ['selected_column_selector_option2'])
    if column_option in [
            'by_index_number', 'all_but_by_index_number', 'by_header_name',
            'all_but_by_header_name'
    ]:
        c = params['input_options']['column_selector_options_2']['col2']
    else:
        c = None

    df_key = infile2 + repr(header)
    if df_key in loaded_df:
        infile2 = loaded_df[df_key]
    else:
        infile2 = pd.read_csv(infile2,
                              sep='\t',
                              header=header,
                              parse_dates=True)
        loaded_df[df_key] = infile2

    y = read_columns(infile2,
                     c=c,
                     c_option=column_option,
                     sep='\t',
                     header=header,
                     parse_dates=True)
    if len(y.shape) == 2 and y.shape[1] == 1:
        y = y.ravel()
    if input_type == 'refseq_and_interval':
        estimator.set_params(data_batch_generator__features=y.ravel().tolist())
        y = None
    # end y

    # load groups
    if groups:
        groups_selector = (params['experiment_schemes']['test_split']
                           ['split_algos']).pop('groups_selector')

        header = 'infer' if groups_selector['header_g'] else None
        column_option = \
            (groups_selector['column_selector_options_g']
                            ['selected_column_selector_option_g'])
        if column_option in [
                'by_index_number', 'all_but_by_index_number', 'by_header_name',
                'all_but_by_header_name'
        ]:
            c = groups_selector['column_selector_options_g']['col_g']
        else:
            c = None

        df_key = groups + repr(header)
        if df_key in loaded_df:
            groups = loaded_df[df_key]

        groups = read_columns(groups,
                              c=c,
                              c_option=column_option,
                              sep='\t',
                              header=header,
                              parse_dates=True)
        groups = groups.ravel()

    # del loaded_df
    del loaded_df

    # cache iraps_core fits could increase search speed significantly
    memory = joblib.Memory(location=CACHE_DIR, verbose=0)
    main_est = get_main_estimator(estimator)
    if main_est.__class__.__name__ == 'IRAPSClassifier':
        main_est.set_params(memory=memory)

    # handle scorer, convert to scorer dict
    scoring = params['experiment_schemes']['metrics']['scoring']
    scorer = get_scoring(scoring)
    if not isinstance(scorer, (dict, list)):
        scorer = [scoring['primary_scoring']]
    scorer = _check_multimetric_scoring(estimator, scoring=scorer)

    # handle test (first) split
    test_split_options = (
        params['experiment_schemes']['test_split']['split_algos'])

    if test_split_options['shuffle'] == 'group':
        test_split_options['labels'] = groups
    if test_split_options['shuffle'] == 'stratified':
        if y is not None:
            test_split_options['labels'] = y
        else:
            raise ValueError("Stratified shuffle split is not "
                             "applicable on empty target values!")

    X_train, X_test, y_train, y_test, groups_train, groups_test = \
        train_test_split_none(X, y, groups, **test_split_options)

    exp_scheme = params['experiment_schemes']['selected_exp_scheme']

    # handle validation (second) split
    if exp_scheme == 'train_val_test':
        val_split_options = (
            params['experiment_schemes']['val_split']['split_algos'])

        if val_split_options['shuffle'] == 'group':
            val_split_options['labels'] = groups_train
        if val_split_options['shuffle'] == 'stratified':
            if y_train is not None:
                val_split_options['labels'] = y_train
            else:
                raise ValueError("Stratified shuffle split is not "
                                 "applicable on empty target values!")

        X_train, X_val, y_train, y_val, groups_train, groups_val = \
            train_test_split_none(X_train, y_train, groups_train,
                                  **val_split_options)

    # train and eval
    if hasattr(estimator, 'config') and hasattr(estimator, 'model_type'):
        if exp_scheme == 'train_val_test':
            estimator.fit(X_train, y_train, validation_data=(X_val, y_val))
        else:
            estimator.fit(X_train, y_train, validation_data=(X_test, y_test))
    else:
        estimator.fit(X_train, y_train)

    if isinstance(estimator, KerasGBatchClassifier):
        scores = {}
        steps = estimator.prediction_steps
        batch_size = estimator.batch_size
        data_generator = estimator.data_generator_

        scores, predictions, y_true = _evaluate_keras_and_sklearn_scores(
            estimator,
            data_generator,
            X_test,
            y=y_test,
            sk_scoring=scoring,
            steps=steps,
            batch_size=batch_size,
            return_predictions=bool(outfile_y_true))

    else:
        scores = {}
        if hasattr(estimator, 'model_') \
                and hasattr(estimator.model_, 'metrics_names'):
            batch_size = estimator.batch_size
            score_results = estimator.model_.evaluate(X_test,
                                                      y=y_test,
                                                      batch_size=batch_size,
                                                      verbose=0)
            metrics_names = estimator.model_.metrics_names
            if not isinstance(metrics_names, list):
                scores[metrics_names] = score_results
            else:
                scores = dict(zip(metrics_names, score_results))

        if hasattr(estimator, 'predict_proba'):
            predictions = estimator.predict_proba(X_test)
        else:
            predictions = estimator.predict(X_test)

        y_true = y_test
        sk_scores = _score(estimator, X_test, y_test, scorer)
        scores.update(sk_scores)

    # handle output
    if outfile_y_true:
        try:
            pd.DataFrame(y_true).to_csv(outfile_y_true, sep='\t', index=False)
            pd.DataFrame(predictions).astype(np.float32).to_csv(
                outfile_y_preds,
                sep='\t',
                index=False,
                float_format='%g',
                chunksize=10000)
        except Exception as e:
            print("Error in saving predictions: %s" % e)
    # handle output
    for name, score in scores.items():
        scores[name] = [score]
    df = pd.DataFrame(scores)
    df = df[sorted(df.columns)]
    df.to_csv(path_or_buf=outfile_result, sep='\t', header=True, index=False)

    memory.clear(warn=False)

    if outfile_object:
        dump_model_to_h5(estimator, outfile_object)
Ejemplo n.º 26
0
def fit_and_score_te_oracle(estimator,
                            X,
                            y,
                            w,
                            p,
                            t,
                            scorer,
                            train,
                            test,
                            parameters=None,
                            fit_params=None,
                            return_train_score=False,
                            return_parameters=False,
                            return_times=False,
                            return_estimator=False,
                            error_score=np.nan,
                            return_test_score_only=False):
    """Fit estimator and compute scores for a given dataset split, using oracle knowledge of
    treatment effects. Based on sklearn.model_selection._validation _fit_and_score, adapted to
    allow more inputs (treatments and treatment effects)

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.
    X : array-like of shape (n_samples, n_features)
            The features to fit to
    y : array-like of shape (n_samples,) or (n_samples, )
            The outcome variable
    w: array-like of shape (n_samples,)
            The treatment indicator
    p: array-like of shape (n_samples,)
            The treatment propensity
    t: array-like of shape (n_samples,)
        the true treatment effect to evaluate against
    scorer : A single callable or dict mapping scorer name to the callable
        If it is a single callable, the return value for ``train_scores`` and
        ``test_scores`` is a single float.
        For a dict, it should be one mapping the scorer name to the scorer
        callable object / function.
        The callable object / fn should have signature
        ``scorer(estimator, X, y)``.
    train : array-like of shape (n_train_samples,)
        Indices of training samples.
    test : array-like of shape (n_test_samples,)
        Indices of test samples.
    error_score : 'raise' or numeric, default=np.nan
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.
    parameters : dict or None
        Parameters to be set on the estimator.
    fit_params : dict or None
        Parameters that will be passed to ``estimator.fit``.
    return_train_score : bool, default=False
        Compute and return score on training set.
    return_parameters : bool, default=False
        Return parameters that has been used for the estimator.
    return_times : bool, default=False
        Whether to return the fit/score times.
    return_estimator : bool, default=False
        Whether to return the fitted estimator.
    return_test_score_only: bool, default=False
        Whether to only return a test score

    Returns
    -------
    train_scores : dict of scorer name -> float
        Score on training set (for all the scorers),
        returned only if `return_train_score` is `True`.
    test_scores : float or dict of scorer name -> float
        If return_test_score_only and scorer == str, then returns only test score. Otherwise,
        s on testing set (for all the scorers)
    n_test_samples : int
        Number of test samples.
    fit_time : float
        Time spent for fitting in seconds.
    score_time : float
        Time spent for scoring in seconds.
    parameters : dict or None
        The parameters that have been evaluated.
    estimator : estimator object
        The fitted estimator
    """
    if not isinstance(estimator, BaseTEModel):
        raise ValueError("This method works only for BaseTEModel")

    scorers, _ = _check_multimetric_scoring(estimator, scoring=scorer)

    # Adjust length of sample weights (if ant)
    fit_params = fit_params if fit_params is not None else {}
    fit_params = _check_fit_params(X, fit_params, train)

    train_scores = {}
    if parameters is not None:
        # clone after setting parameters in case any parameters
        # are estimators (like pipeline steps)
        # because pipeline doesn't clone steps in fit
        cloned_parameters = {}
        for k, v in parameters.items():
            cloned_parameters[k] = clone(v, safe=False)

        estimator = estimator.set_params(**cloned_parameters)

    start_time = time.time()

    X_train, y_train, w_train, p_train, t_train = _safe_split_te(
        X, y, w, p, t, train)
    X_test, y_test, w_test, p_test, t_test = _safe_split_te(
        X, y, w, p, t, test)

    try:
        estimator.fit(X_train, y_train, w_train, p_train, **fit_params)

    except Exception as e:
        if return_test_score_only:
            if error_score == 'raise':
                raise
            else:
                return np.nan
        # Note fit time as time until error
        fit_time = time.time() - start_time
        score_time = 0.0
        if error_score == 'raise':
            raise
        elif isinstance(error_score, numbers.Number):
            if isinstance(scorer, dict):
                test_scores = {name: error_score for name in scorer}
                if return_train_score:
                    train_scores = test_scores.copy()
            else:
                test_scores = error_score
                if return_train_score:
                    train_scores = error_score
            warnings.warn(
                "Estimator fit failed. The score on this train-test"
                " partition for these parameters will be set to %f. "
                "Details: \n%s" % (error_score, format_exc()),
                FitFailedWarning)
        else:
            raise ValueError("error_score must be the string 'raise' or a"
                             " numeric value. (Hint: if using 'raise', please"
                             " make sure that it has been spelled correctly.)")

    else:
        fit_time = time.time() - start_time

        try:
            test_scores = _score(estimator, X_test, t_test, scorers)
        except Exception:
            if return_test_score_only:
                if error_score == 'raise':
                    raise
                else:
                    return np.nan

        score_time = time.time() - start_time - fit_time

        if return_test_score_only:
            if type(scorer) == str:
                return test_scores['score']
            else:
                return test_scores

        if return_train_score:
            train_scores = _score(estimator, X_train, t_train, scorers)

    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(parameters)
    if return_estimator:
        ret.append(estimator)
    return ret
Ejemplo n.º 27
0
    def fit(self, X, y=None, *, groups=None, **fit_params):
        self.initialize_fitting(X, y)

        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                    not isinstance(self.refit, str) or
                    # This will work for both dict / list (tuple)
                    self.refit not in scorers) and not callable(self.refit):
                raise ValueError("For multi-metric scoring, the parameter "
                                 "refit must be set to a scorer key or a "
                                 "callable to refit an estimator with the "
                                 "best parameter setting on the whole "
                                 "data and make the best_* attributes "
                                 "available for that metric. If this is "
                                 "not needed, refit should be set to "
                                 "False explicitly. %r was passed." %
                                 self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'

        X, y, groups = indexable(X, y, groups)
        fit_params = _check_fit_params(X, fit_params)

        n_splits = cv.get_n_splits(X, y, groups)

        base_estimator = clone(self.estimator)

        parallel = Parallel(n_jobs=self.n_jobs,
                            verbose=self.verbose,
                            pre_dispatch=self.pre_dispatch)

        fit_and_score_kwargs = dict(scorer=scorers,
                                    fit_params=fit_params,
                                    return_train_score=self.return_train_score,
                                    return_n_test_samples=True,
                                    return_times=True,
                                    return_parameters=False,
                                    error_score=self.error_score,
                                    verbose=self.verbose)
        results = {}
        with parallel:
            all_candidate_params = []
            all_out = []

            def evaluate_candidates(candidate_params):
                candidate_params = list(candidate_params)
                n_candidates = len(candidate_params)

                if self.verbose > 0:
                    print("Fitting {0} folds for each of {1} candidates,"
                          " totalling {2} fits".format(
                              n_splits, n_candidates, n_candidates * n_splits))

                out = parallel(
                    delayed(self._fit_score_and_log)(clone(base_estimator),
                                                     X,
                                                     y,
                                                     train=train,
                                                     test=test,
                                                     parameters=parameters,
                                                     **fit_and_score_kwargs)
                    for parameters, (train, test) in product(
                        candidate_params, cv.split(X, y, groups)))

                if len(out) < 1:
                    raise ValueError('No fits were performed. '
                                     'Was the CV iterator empty? '
                                     'Were there no candidates?')
                elif len(out) != n_candidates * n_splits:
                    raise ValueError('cv.split and cv.get_n_splits returned '
                                     'inconsistent results. Expected {} '
                                     'splits, got {}'.format(
                                         n_splits,
                                         len(out) // n_candidates))

                all_candidate_params.extend(candidate_params)
                all_out.extend(out)

                nonlocal results
                results = self._format_results(all_candidate_params, scorers,
                                               n_splits, all_out)
                return results

            self._run_search(evaluate_candidates)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            # If callable, refit is expected to return the index of the best
            # parameter set.
            if callable(self.refit):
                self.best_index_ = self.refit(results)
                if not isinstance(self.best_index_, numbers.Integral):
                    raise TypeError('best_index_ returned is not an integer')
                if (self.best_index_ < 0
                        or self.best_index_ >= len(results["params"])):
                    raise IndexError('best_index_ index out of range')
            else:
                self.best_index_ = results["rank_test_%s" %
                                           refit_metric].argmin()
                self.best_score_ = results["mean_test_%s" %
                                           refit_metric][self.best_index_]
            self.best_params_ = results["params"][self.best_index_]

        if self.refit:
            # we clone again after setting params in case some
            # of the params are estimators as well.
            self.best_estimator_ = clone(
                clone(base_estimator).set_params(**self.best_params_))
            refit_start_time = time.time()
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)
            refit_end_time = time.time()
            self.refit_time_ = refit_end_time - refit_start_time

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 28
0
    def fit(self, X, y=None, groups=None, **fit_params):
        """Run fit with all sets of parameters.

        Parameters
        ----------

        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator
        """
        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                    not isinstance(self.refit, str) or
                    # This will work for both dict / list (tuple)
                    self.refit not in scorers) and not callable(self.refit):
                raise ValueError("For multi-metric scoring, the parameter "
                                 "refit must be set to a scorer key or a "
                                 "callable to refit an estimator with the "
                                 "best parameter setting on the whole "
                                 "data and make the best_* attributes "
                                 "available for that metric. If this is "
                                 "not needed, refit should be set to "
                                 "False explicitly. %r was passed."
                                 % self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'
        self.refit_metric = refit_metric

        X, y, groups = indexable(X, y, groups)
        n_splits = cv.get_n_splits(X, y, groups)

        base_estimator = clone(self.estimator)

        parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
                            pre_dispatch=self.pre_dispatch)

        fit_and_score_kwargs = dict(scorer=scorers,
                                    fit_params=fit_params,
                                    return_train_score=self.return_train_score,
                                    return_n_test_samples=True,
                                    return_times=True,
                                    return_parameters=False,
                                    error_score=self.error_score,
                                    verbose=self.verbose)
        results = {}
        with parallel:
            all_candidate_params = []
            all_out = []
            all_more_results = defaultdict(list)

            def evaluate_candidates(candidate_params, X, y, groups,
                                    more_results=None):
                candidate_params = list(candidate_params)
                n_candidates = len(candidate_params)

                if self.verbose > 0:
                    print("Fitting {0} folds for each of {1} candidates,"
                          " totalling {2} fits".format(
                              n_splits, n_candidates, n_candidates * n_splits))

                out = parallel(delayed(_fit_and_score)(clone(base_estimator),
                                                       X, y,
                                                       train=train, test=test,
                                                       parameters=parameters,
                                                       **fit_and_score_kwargs)
                               for parameters, (train, test)
                               in product(candidate_params,
                                          cv.split(X, y, groups)))

                if len(out) < 1:
                    raise ValueError('No fits were performed. '
                                     'Was the CV iterator empty? '
                                     'Were there no candidates?')
                elif len(out) != n_candidates * n_splits:
                    raise ValueError('cv.split and cv.get_n_splits returned '
                                     'inconsistent results. Expected {} '
                                     'splits, got {}'
                                     .format(n_splits,
                                             len(out) // n_candidates))

                all_candidate_params.extend(candidate_params)
                all_out.extend(out)
                if more_results is not None:
                    for key, value in more_results.items():
                        all_more_results[key].extend(value)

                nonlocal results
                results = self._format_results(
                    all_candidate_params, scorers, n_splits, all_out,
                    all_more_results)

                return results

            self._run_search(evaluate_candidates, X, y, groups)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            # If callable, refit is expected to return the index of the best
            # parameter set.
            if callable(self.refit):
                self.best_index_ = self.refit(results)
                if not isinstance(self.best_index_, (int, np.integer)):
                    raise TypeError('best_index_ returned is not an integer')
                if (self.best_index_ < 0 or
                   self.best_index_ >= len(results["params"])):
                    raise IndexError('best_index_ index out of range')
            else:
                self.best_index_ = results["rank_test_%s"
                                           % refit_metric].argmin()
                self.best_score_ = results["mean_test_%s" % refit_metric][
                                           self.best_index_]
            self.best_params_ = results["params"][self.best_index_]

        if self.refit:
            self.best_estimator_ = clone(base_estimator).set_params(
                **self.best_params_)
            refit_start_time = time.time()
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)
            refit_end_time = time.time()
            self.refit_time_ = refit_end_time - refit_start_time

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 29
0
def cross_validate_checkpoint(
    estimator,
    X,
    y=None,
    *,
    groups=None,
    scoring=None,
    cv=None,
    n_jobs=None,
    verbose=0,
    fit_params=None,
    pre_dispatch="2*n_jobs",
    return_train_score=False,
    return_estimator=False,
    error_score=np.nan,
    workdir=None,
    checkpoint=True,
    force_refresh=False,
    serialize_cv=False,
):
    """Evaluate metric(s) by cross-validation and also record fit/score times.

    This is a copy of :func:`sklearn:sklearn.model_selection.cross_validate`
    that uses :func:`_fit_and_score_ckpt` to checkpoint scores and estimators
    for each CV split.
    Read more in the :ref:`sklearn user guide <sklearn:multimetric_cross_validation>`.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape (n_samples, n_features)
        The data to fit. Can be for example a list, or an array.

    y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None
        The target variable to try to predict in the case of
        supervised learning.

    groups : array-like of shape (n_samples,), default=None
        Group labels for the samples used while splitting the dataset into
        train/test set. Only used in conjunction with a "Group" :term:`cv`
        instance (e.g., :class:`sklearn:GroupKFold`).

    scoring : str, callable, list/tuple, or dict, default=None
        A single str (see :ref:`sklearn:scoring_parameter`) or a callable
        (see :ref:`sklearn:scoring`) to evaluate the predictions on the test set.

        For evaluating multiple metrics, either give a list of (unique) strings
        or a dict with names as keys and callables as values.

        NOTE that when using custom scorers, each scorer should return a single
        value. Metric functions returning a list/array of values can be wrapped
        into multiple scorers that return one value each.

        See :ref:`sklearn:multimetric_grid_search` for an example.

        If None, the estimator's score method is used.

    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 5-fold cross validation,
        - int, to specify the number of folds in a `(Stratified)KFold`,
        - an sklearn `CV splitter <https://scikit-learn.org/stable/glossary.html#term-cv-splitter>`_,
        - An iterable yielding (train, test) splits as arrays of indices.

        For int/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass,
        :class:`sklearn.model_selection.StratifiedKFold` is used. In all
        other cases, :class:`sklearn.model_selection.KFold` is used.
        Refer :ref:`sklearn user guide <sklearn:cross_validation>` for the
        various cross-validation strategies that can be used here.

    n_jobs : int, default=None
        The number of CPUs to use to do the computation.
        ``None`` means 1 unless in a :obj:`joblib:joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`sklearn Glossary <sklearn:n_jobs>`
        for more details.

    verbose : int, default=0
        The verbosity level.

    fit_params : dict, default=None
        Parameters to pass to the fit method of the estimator.

    pre_dispatch : int or str, default='2*n_jobs'
        Controls the number of jobs that get dispatched during parallel
        execution. Reducing this number can be useful to avoid an
        explosion of memory consumption when more jobs get dispatched
        than CPUs can process. This parameter can be:

            - None, in which case all the jobs are immediately
              created and spawned. Use this for lightweight and
              fast-running jobs, to avoid delays due to on-demand
              spawning of the jobs

            - An int, giving the exact number of total jobs that are
              spawned

            - A str, giving an expression as a function of n_jobs,
              as in '2*n_jobs'

    return_train_score : bool, default=False
        Whether to include train scores.
        Computing training scores is used to get insights on how different
        parameter settings impact the overfitting/underfitting trade-off.
        However computing the scores on the training set can be computationally
        expensive and is not strictly required to select the parameters that
        yield the best generalization performance.

    return_estimator : bool, default=False
        Whether to return the estimators fitted on each split.

    error_score : 'raise' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.

    workdir : path-like object, default=None
        A string or :term:`python:path-like-object` indicating the directory
        in which to store checkpoint files

    checkpoint : bool, default=True
        If True, checkpoint the parameters, estimators, and scores.

    force_refresh : bool, default=False
        If True, recompute scores even if the checkpoint file already exists.
        Otherwise, load scores from checkpoint files and return.

    serialize_cv : bool, default=False
        If True, do not use joblib.Parallel to evaluate each CV split.

    Returns
    -------
    scores : dict of float arrays of shape (n_splits,)
        Array of scores of the estimator for each run of the cross validation.

        A dict of arrays containing the score/time arrays for each scorer is
        returned. The possible keys for this ``dict`` are:

            ``test_score``
                The score array for test scores on each cv split.
                Suffix ``_score`` in ``test_score`` changes to a specific
                metric like ``test_r2`` or ``test_auc`` if there are
                multiple scoring metrics in the scoring parameter.
            ``train_score``
                The score array for train scores on each cv split.
                Suffix ``_score`` in ``train_score`` changes to a specific
                metric like ``train_r2`` or ``train_auc`` if there are
                multiple scoring metrics in the scoring parameter.
                This is available only if ``return_train_score`` parameter
                is ``True``.
            ``fit_time``
                The time for fitting the estimator on the train
                set for each cv split.
            ``score_time``
                The time for scoring the estimator on the test set for each
                cv split. (Note time for scoring on the train set is not
                included even if ``return_train_score`` is set to ``True``
            ``estimator``
                The estimator objects for each cv split.
                This is available only if ``return_estimator`` parameter
                is set to ``True``.

    Examples
    --------
    >>> import shutil
    >>> import tempfile
    >>> from sklearn import datasets, linear_model
    >>> from afqinsight import cross_validate_checkpoint
    >>> from sklearn.pipeline import make_pipeline
    >>> from sklearn.preprocessing import StandardScaler
    >>> diabetes = datasets.load_diabetes()
    >>> X = diabetes.data[:150]
    >>> y = diabetes.target[:150]
    >>> lasso = linear_model.Lasso()

    Single metric evaluation using ``cross_validate``

    >>> cv_results = cross_validate_checkpoint(lasso, X, y, cv=3, checkpoint=False)
    >>> sorted(cv_results.keys())
    ['fit_time', 'score_time', 'test_score']
    >>> cv_results['test_score']
    array([0.33150734, 0.08022311, 0.03531764])

    Multiple metric evaluation using ``cross_validate``, an estimator
    pipeline, and checkpointing (please refer the ``scoring`` parameter doc
    for more information)

    >>> tempdir = tempfile.mkdtemp()
    >>> scaler = StandardScaler()
    >>> pipeline = make_pipeline(scaler, lasso)
    >>> scores = cross_validate_checkpoint(pipeline, X, y, cv=3,
    ...                         scoring=('r2', 'neg_mean_squared_error'),
    ...                         return_train_score=True, checkpoint=True,
    ...                         workdir=tempdir, return_estimator=True)
    >>> shutil.rmtree(tempdir)
    >>> print(scores['test_neg_mean_squared_error'])
    [-2479.2... -3281.2... -3466.7...]
    >>> print(scores['train_r2'])
    [0.507... 0.602... 0.478...]

    See Also
    --------
    sklearn.model_selection.cross_val_score:
        Run cross-validation for single metric evaluation.
    sklearn.model_selection.cross_val_predict:
        Get predictions from each split of cross-validation for diagnostic
        purposes.
    sklearn.metrics.make_scorer:
        Make a scorer from a performance metric or loss function.
    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    if serialize_cv:
        scores = [
            _fit_and_score_ckpt(
                workdir=workdir,
                checkpoint=checkpoint,
                force_refresh=force_refresh,
                estimator=clone(estimator),
                X=X,
                y=y,
                scorer=scorers,
                train=train,
                test=test,
                verbose=verbose,
                parameters=None,
                fit_params=fit_params,
                return_train_score=return_train_score,
                return_times=True,
                return_estimator=return_estimator,
                error_score=error_score,
            ) for train, test in cv.split(X, y, groups)
        ]
    else:
        parallel = Parallel(n_jobs=n_jobs,
                            verbose=verbose,
                            pre_dispatch=pre_dispatch)
        scores = parallel(
            delayed(_fit_and_score_ckpt)(
                workdir=workdir,
                checkpoint=checkpoint,
                force_refresh=force_refresh,
                estimator=clone(estimator),
                X=X,
                y=y,
                scorer=scorers,
                train=train,
                test=test,
                verbose=verbose,
                parameters=None,
                fit_params=fit_params,
                return_train_score=return_train_score,
                return_times=True,
                return_estimator=return_estimator,
                error_score=error_score,
            ) for train, test in cv.split(X, y, groups))

    zipped_scores = list(zip(*scores))
    if return_train_score:
        train_scores = zipped_scores.pop(0)
        train_scores = _aggregate_score_dicts(train_scores)
    if return_estimator:
        fitted_estimators = zipped_scores.pop()
    test_scores, fit_times, score_times = zipped_scores
    test_scores = _aggregate_score_dicts(test_scores)

    ret = {}
    ret["fit_time"] = np.array(fit_times)
    ret["score_time"] = np.array(score_times)

    if return_estimator:
        ret["estimator"] = fitted_estimators

    for name in scorers:
        ret["test_%s" % name] = np.array(test_scores[name])
        if return_train_score:
            key = "train_%s" % name
            ret[key] = np.array(train_scores[name])

    return ret
Ejemplo n.º 30
0
    def fit(self, Xs, y=None, *, groups=None, **fit_params):
        """Run fit with all sets of parameters.
        Parameters
        ----------
        Xs : array-like of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.
        y : array-like of shape (n_samples, n_output) \
            or (n_samples,), default=None
            Target relative to X for classification or regression;
            None for unsupervised learning.
        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set. Only used in conjunction with a "Group" :term:`cv`
            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
        **fit_params : dict of str -> object
            Parameters passed to the ``fit`` method of the estimator.
        Returns
        -------
        self : object
            Instance of fitted estimator.
        """
        estimator = self.estimator
        refit_metric = "score"

        if callable(self.scoring):
            scorers = self.scoring
        elif self.scoring is None or isinstance(self.scoring, str):
            scorers = check_scoring(self.estimator, self.scoring)
        else:
            scorers = _check_multimetric_scoring(self.estimator, self.scoring)
            self._check_refit_for_multimetric(scorers)
            refit_metric = self.refit

        fit_params = _check_fit_params(Xs[0], fit_params)

        cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))
        n_splits = cv_orig.get_n_splits(Xs[0], y, groups)

        base_estimator = clone(self.estimator)

        parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch)

        fit_and_score_kwargs = dict(
            scorer=scorers,
            fit_params=fit_params,
            return_train_score=self.return_train_score,
            return_n_test_samples=True,
            return_times=True,
            return_parameters=False,
            error_score=self.error_score,
            verbose=self.verbose,
        )
        results = {}
        with parallel:
            all_candidate_params = []
            all_out = []
            all_more_results = defaultdict(list)

            def evaluate_candidates(candidate_params,
                                    cv=None,
                                    more_results=None):
                cv = cv or cv_orig
                candidate_params = list(candidate_params)
                n_candidates = len(candidate_params)

                if self.verbose > 0:
                    print("Fitting {0} folds for each of {1} candidates,"
                          " totalling {2} fits".format(
                              n_splits, n_candidates, n_candidates * n_splits))

                X_transformed, _, _, n_features = check_Xs(
                    Xs, copy=True, return_dimensions=True)
                pipeline = Pipeline([
                    ("splitter", SimpleSplitter(n_features)),
                    ("estimator", clone(base_estimator)),
                ])
                pipeline.fit(np.hstack(Xs))
                out = parallel(
                    delayed(_fit_and_score)(
                        pipeline,
                        np.hstack(Xs),
                        y,
                        train=train,
                        test=test,
                        parameters={
                            f"estimator__{k}": v
                            for k, v in parameters.items()
                        },
                        split_progress=(split_idx, n_splits),
                        candidate_progress=(cand_idx, n_candidates),
                        **fit_and_score_kwargs,
                    ) for (cand_idx,
                           parameters), (split_idx, (train, test)) in product(
                               enumerate(candidate_params),
                               enumerate(cv.split(Xs[0], y, groups)),
                           ))

                if len(out) < 1:
                    raise ValueError("No fits were performed. "
                                     "Was the CV iterator empty? "
                                     "Were there no candidates?")
                elif len(out) != n_candidates * n_splits:
                    raise ValueError("cv.split and cv.get_n_splits returned "
                                     "inconsistent results. Expected {} "
                                     "splits, got {}".format(
                                         n_splits,
                                         len(out) // n_candidates))

                # For callable self.scoring, the return type is only know after
                # calling. If the return type is a dictionary, the error scores
                # can now be inserted with the correct key. The type checking
                # of out will be done in `_insert_error_scores`.
                if callable(self.scoring):
                    _insert_error_scores(out, self.error_score)

                all_candidate_params.extend(candidate_params)
                all_out.extend(out)

                if more_results is not None:
                    for key, value in more_results.items():
                        all_more_results[key].extend(value)

                nonlocal results
                results = self._format_results(all_candidate_params, n_splits,
                                               all_out, all_more_results)

                return results

            self._run_search(evaluate_candidates)

            # multimetric is determined here because in the case of a callable
            # self.scoring the return type is only known after calling
            first_test_score = all_out[0]["test_scores"]
            self.multimetric_ = isinstance(first_test_score, dict)

            # check refit_metric now for a callabe scorer that is multimetric
            if callable(self.scoring) and self.multimetric_:
                self._check_refit_for_multimetric(first_test_score)
                refit_metric = self.refit

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            self.best_index_ = self._select_best_index(self.refit,
                                                       refit_metric, results)
            if not callable(self.refit):
                # With a non-custom callable, we can select the best score
                # based on the best index
                self.best_score_ = results[f"mean_test_{refit_metric}"][
                    self.best_index_]
            self.best_params_ = results["params"][self.best_index_]

        if self.refit:
            # we clone again after setting params in case some
            # of the params are estimators as well.
            self.best_estimator_ = clone(
                clone(base_estimator).set_params(**self.best_params_))
            refit_start_time = time.time()
            if y is not None:
                self.best_estimator_.fit(Xs, y, **fit_params)
            else:
                self.best_estimator_.fit(Xs, **fit_params)
            refit_end_time = time.time()
            self.refit_time_ = refit_end_time - refit_start_time

            if hasattr(self.best_estimator_, "feature_names_in_"):
                self.feature_names_in_ = self.best_estimator_.feature_names_in_

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers

        self.cv_results_ = results
        self.n_splits_ = n_splits
        return self