Esempio n. 1
0
    def _evaluate_one(self, estimator, data_preproc, scorers):
        res = []
        for X_train, X_test, y_train, y_test in data_preproc:
            est = clone(estimator)
            est.fit(X_train, y_train)
            # fit_time = time.time() - start_time
            # _score will return dict if is_multimetric is True
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore',
                                        category=UndefinedMetricWarning)
                test_scores = _multimetric_score(est, X_test, y_test, scorers)
            # score_time = time.time() - start_time - fit_time
            # train_scores = _multimetric_score(estimator, X_train, y_train,
            #                                   scorers)
            res.append(test_scores)

        res_mean = pd.DataFrame(res).mean(axis=0)
        try:
            # show only last step of pipeline for simplicity
            name = nice_repr(estimator.steps[-1][1])
        except AttributeError:
            name = nice_repr(estimator)
        name = name.replace("\s", " ")
        if self.verbose:
            print(name)
            res_string = "".join("{}: {:.4f}    ".format(m, s)
                                 for m, s in res_mean.items())
            print(res_string)
        res_mean.name = name
        self.log_.append(res_mean)
        return res_mean
Esempio n. 2
0
    def _compute_metrics_train_test(self, X_tr, y_tr, X_te, y_te):
        '''Compute metrics on test set, given entire train-test split'''
        X_tr, y_tr = self._format_inputs(X_tr, y_tr)
        X_te, y_te = self._format_inputs(X_te, y_te)

        # fit model on entire training set
        self.estimator.fit(X_tr, y_tr)

        scorers = {
            scorer_info.name: scorer_info.scorer
            for scorer_info in self.scorers_info
        }
        multimetric_score_results = _multimetric_score(self.estimator, X_te,
                                                       y_te, scorers)

        return self._process_cv_results(multimetric_score_results,
                                        filter_testing_keys=False)
Esempio n. 3
0
def __fit_and_score(x_train, y_train, x_test, y_test, clf, cv_repeat=None, grid_search_k=None, param_grid=None, grid_scorer=None, scorers=None):
    """Fitting and scoring a classifier through crossvalidated gridsearch over
    a parameter grid.
    """
    # copy classifier
    clf = clone(clf)
    # grid search
    grid_search_clf = GridSearchCV(
        estimator=clf,
        param_grid=param_grid,
        scoring=scorers,
        cv=grid_search_k,
        refit=grid_scorer,
        )
    grid_search_clf = grid_search_clf.fit(x_train, y_train)
    # save to results
    result = {}
    result['gridsearch'] = grid_search_clf.cv_results_
    result['best_params'] = grid_search_clf.best_params_
    result['scores'] = _multimetric_score(grid_search_clf, x_test, y_test, scorers)
    result['cv_repeat'] = cv_repeat
    result['clf'] = clf.__class__.__name__
    return result
def test__score_with_group__multimetric_score_with_group():
    roc_auc_scorer = SCORERS["roc_auc"]
    
    np.random.seed(123)
    X_test = np.random.randn(100,10)
    y_test = 1*(np.random.randn(100)>0)
    group_test = np.array([0]*25 + [1] * 25 + [2] * 25 + [3]*25)
    
    
    estimator = LogisticRegression(solver="lbfgs", random_state=123)
    estimator.fit(X_test,y_test)
    
    #######################################################
    ###   Test with a scorer that doesn't accept group  ###
    #######################################################
    for i in range(2):
        if i == 0:
            result1 = _score_with_group(estimator, X_test, y_test, None, roc_auc_scorer)
        else:
            result1 = _score_with_group(estimator, X_test, y_test, group_test, roc_auc_scorer)
        result2 = _score(estimator, X_test, y_test, roc_auc_scorer)
        
        assert not pd.isnull(result1)
        assert isinstance(result1, numbers.Number)
        assert abs(result1 - result2) <= 10**(-10)
        
    for i in range(2):
        if i == 0:
            result1 = _multimetric_score_with_group(estimator, X_test, y_test, None, {"auc":roc_auc_scorer})
        else:
            result1 = _multimetric_score_with_group(estimator, X_test, y_test, group_test, {"auc":roc_auc_scorer})
        result2 = _multimetric_score(estimator, X_test, y_test, {"auc":roc_auc_scorer})
        
        assert isinstance(result1,dict)
        assert set(result1.keys()) == {"auc"}
        assert not pd.isnull(result1["auc"])
        assert isinstance(result1["auc"],numbers.Number)
        assert abs(result1["auc"] - result2["auc"]) <= 10**(-10)
    
    ##############################################
    ### test with a scorer that accepts group  ###
    ##############################################
        
    max_proba_scorer = _GroupProbaScorer(score_func=max_proba_group_accuracy, sign=1, kwargs={})
    
    result1 = _score_with_group(estimator, X_test, y_test, group_test, max_proba_scorer)
    assert not pd.isnull(result1)
    assert isinstance(result1, numbers.Number)
    assert 0 <= result1 <= 1
    
    with pytest.raises(TypeError):
        result1 = _score_with_group(estimator, X_test, y_test, None, max_proba_scorer)
    # raise error because scorer expects group

    result1 = _multimetric_score_with_group(estimator, X_test, y_test, group_test, {"mp_score":max_proba_scorer})
    assert isinstance(result1, dict)
    assert set(result1.keys()) == {"mp_score"}
    r = result1["mp_score"]
    assert not pd.isnull(r)
    assert isinstance(r, numbers.Number)
    assert 0 <= r <= 1
    
    with pytest.raises(TypeError):
        result1 = _multimetric_score_with_group(estimator, X_test, y_test, None, {"mp_score":max_proba_scorer})
        # raise error because scorer expects group
    
    #######################
    ###  test with both ###
    #######################
    result1 = _multimetric_score_with_group(estimator, X_test, y_test, group_test, {"mp_score":max_proba_scorer,
                                                                                    "auc":roc_auc_scorer
                                                                                    })
    assert isinstance(result1, dict)
    assert set(result1.keys()) == {"auc","mp_score"}
    r = result1["mp_score"]
    assert not pd.isnull(r)
    assert isinstance(r, numbers.Number)
    assert 0 <= r <= 1