def test_thresholded_scorers():
    # Test scorers that take thresholds.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = get_scorer('log_loss')(clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # test with a regressor (no decision_function)
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(reg, X_test, y_test)
    score2 = roc_auc_score(y_test, reg.predict(X_test))
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    assert_raises(ValueError, get_scorer('roc_auc'), clf, X_test, y_test)
def test_thresholded_scorers():
    # Test scorers that take thresholds.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = get_scorer('neg_log_loss')(clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # test with a regressor (no decision_function)
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(reg, X_test, y_test)
    score2 = roc_auc_score(y_test, reg.predict(X_test))
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    with pytest.raises(ValueError, match="multiclass format is not supported"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # test error is raised with a single class present in model
    # (predict_proba shape is not suitable for binary auc)
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = DecisionTreeClassifier()
    clf.fit(X_train, np.zeros_like(y_train))
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # for proba scorers
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('neg_log_loss')(clf, X_test, y_test)
    def __init__(self, estimator, k_features=1,
                 forward=True, floating=False,
                 verbose=0, scoring=None,
                 cv=5, skip_if_stuck=True, n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):

        self.estimator = estimator
        self.k_features = k_features
        self.forward = forward
        self.floating = floating
        self.pre_dispatch = pre_dispatch
        self.scoring = scoring
        if isinstance(scoring, str):
            self.scorer = get_scorer(scoring)
        else:
            self.scorer = scoring
        self.skip_if_stuck = skip_if_stuck
        self.cv = cv
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.clone_estimator = clone_estimator
        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.fitted = False
        self.subsets_ = {}
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
    def __init__(self, estimator, min_features=1, max_features=1,
                 print_progress=True, scoring='accuracy',
                 cv=5, n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):
        self.estimator = estimator
        self.min_features = min_features
        self.max_features = max_features
        self.pre_dispatch = pre_dispatch
        self.scoring = scoring
        self.scorer = get_scorer(scoring)
        self.cv = cv
        self.print_progress = print_progress
        self.n_jobs = n_jobs
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.clone_estimator = clone_estimator
        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.fitted = False
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
Exemple #5
0
def clf_bias_var(clf, X, y, n_replicas):
        
    roc_auc_scorer = get_scorer("roc_auc")
    # roc_auc_scorer(clf, X_test, y_test)
    auc_scores = []
    error_scores = []
    counts = np.zeros(X.shape[0], dtype = np.float64)
    sum_preds = np.zeros(X.shape[0], dtype = np.float64)
    for it in xrange(n_replicas):
        # generate train sets and test sets
        train_indices = np.random.randint(X.shape[0], size = X.shape[0])
        # get test sets
        in_train = np.unique(train_indices)
        mask = np.ones(X.shape[0], dtype = np.bool)
        mask[in_train] = False
        test_indices = np.arange(X.shape[0])[mask]
        
        clf.fit(X[train_indices], y[train_indices])
        
        auc_scores.append(roc_auc_scorer(clf, X[test_indices], y[test_indices]))
        error_scores.append(zero_one_loss(y[test_indices], clf.predict(X[test_indices])))
        
        preds = clf.predict(X)
        for index in test_indices:
            counts[index] += 1
            sum_preds[index] += preds[index]
    
    test_mask = (counts > 0) # indices of samples that have been tested
    
    # print('counts mean: {}'.format(np.mean(counts)))
    # print('counts standard derivation: {}'.format(np.std(counts)))
    
    bias, var = bias_var(y[test_mask], sum_preds[test_mask], counts[test_mask], n_replicas)
    
    return auc_scores, error_scores, bias, var
Exemple #6
0
def _make_scorer(scoring):
    """Make scorer.

    Parameters
    ----------
    scoring : str | callable
        If str, must be compatible with sklearn sklearn's get_scorer.
        If callable, function with signature ``score_func(y, y_pred,
        **kwargs)``.
    Returns
    -------
    scorer : callable | None
        The scorer.
    """
    from sklearn.metrics import make_scorer, get_scorer

    # If scoring is None (default), the predictions are internally
    # generated by estimator.score(). Else, we must first get the
    # predictions based on the scorer.
    if scoring is None:
        return None
    elif isinstance(scoring, str):
        return get_scorer(scoring)
    else:
        return make_scorer(scoring)
 def __init__(self, estimator, k_features,
              forward=True, floating=False,
              print_progress=True, scoring='accuracy',
              cv=5, skip_if_stuck=True, n_jobs=1,
              pre_dispatch='2*n_jobs',
              clone_estimator=True):
     self.estimator = estimator
     self.k_features = k_features
     self.forward = forward
     self.floating = floating
     self.pre_dispatch = pre_dispatch
     self.scoring = scoring
     self.scorer = get_scorer(scoring)
     self.skip_if_stuck = skip_if_stuck
     self.cv = cv
     self.print_progress = print_progress
     self.n_jobs = n_jobs
     self.named_est = {key: value for key, value in
                       _name_estimators([self.estimator])}
     self.clone_estimator = clone_estimator
     if self.clone_estimator:
         self.est_ = clone(self.estimator)
     else:
         self.est_ = self.estimator
     self.fitted = False
    def __init__(self,
                 n_jobs=-1,
                 offset_scale=1.0,
                 n_buckets=2,
                 initial_params=None,
                 minimizer='BFGS',
                 basinhopping=False,
                 scoring='accuracy'):

        from numpy import array

        self.n_jobs = int(n_jobs)
        self.offset_scale = float(offset_scale)
        self.n_buckets = int(n_buckets)
        if initial_params is None:
            #self.initial_offsets_ = [-0.5] * self.n_buckets
            pass
        else:
            self.params = array(initial_params)
            #assert(len(self.initial_offsets_) == self.n_buckets)
            pass
        self.minimizer = minimizer
        self.basinhopping = basinhopping
        from sklearn.metrics import get_scorer
        self.scoring = get_scorer(scoring)
        pass
Exemple #9
0
def test_rfecv():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)  # regression test: list should be supported

    # Test using the score function
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    rfecv.fit(X, y)
    # non-regression test for missing worst feature:
    assert_equal(len(rfecv.grid_scores_), X.shape[1])
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)

    # All the noisy variable were filtered out
    assert_array_equal(X_r, iris.data)

    # same in sparse
    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)

    # Test using a customized loss function
    scoring = make_scorer(zero_one_loss, greater_is_better=False)
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, scoring=scoring)
    ignore_warnings(rfecv.fit)(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test using a scorer
    scorer = get_scorer("accuracy")
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, scoring=scorer)
    rfecv.fit(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test fix on grid_scores
    def test_scorer(estimator, X, y):
        return 1.0

    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, scoring=test_scorer)
    rfecv.fit(X, y)
    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))

    # Same as the first two tests, but with step=2
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
    rfecv.fit(X, y)
    assert_equal(len(rfecv.grid_scores_), 6)
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)
def test_classification_scores():
    # Test classification scorers.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LinearSVC(random_state=0)
    clf.fit(X_train, y_train)

    for prefix, metric in [('f1', f1_score), ('precision', precision_score),
                           ('recall', recall_score),
                           ('jaccard', jaccard_score)]:

        score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='weighted')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='macro')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='micro')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=1)
        assert_almost_equal(score1, score2)

    # test fbeta score that takes an argument
    scorer = make_scorer(fbeta_score, beta=2)
    score1 = scorer(clf, X_test, y_test)
    score2 = fbeta_score(y_test, clf.predict(X_test), beta=2)
    assert_almost_equal(score1, score2)

    # test that custom scorer can be pickled
    unpickled_scorer = pickle.loads(pickle.dumps(scorer))
    score3 = unpickled_scorer(clf, X_test, y_test)
    assert_almost_equal(score1, score3)

    # smoke test the repr:
    repr(fbeta_score)
def test_supervised_cluster_scorers():
    # Test clustering scorers against gold standard labeling.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    km = KMeans(n_clusters=3)
    km.fit(X_train)
    for name in CLUSTER_SCORERS:
        score1 = get_scorer(name)(km, X_test, y_test)
        score2 = getattr(cluster_module, name)(y_test, km.predict(X_test))
        assert_almost_equal(score1, score2)
def test_regression_scorers():
    # Test regression scorers.
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = get_scorer('r2')(clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2)
def test_unsupervised_scorers():
    # Test clustering scorers against gold standard labeling.
    # We don't have any real unsupervised Scorers yet.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    km = KMeans(n_clusters=3)
    km.fit(X_train)
    score1 = get_scorer('adjusted_rand_score')(km, X_test, y_test)
    score2 = adjusted_rand_score(y_test, km.predict(X_test))
    assert_almost_equal(score1, score2)
def test_unsupervised_scorers():
    # Test clustering scorers against gold standard labeling.
    # We don't have any real unsupervised Scorers yet.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    km = KMeans(n_clusters=3)
    km.fit(X_train)
    score1 = get_scorer('adjusted_rand_score')(km, X_test, y_test)
    score2 = adjusted_rand_score(y_test, km.predict(X_test))
    assert_almost_equal(score1, score2)
Exemple #15
0
def test_thresholded_scorers_multilabel_indicator_data():
    """Test that the scorer work with multilabel-indicator format
    for multilabel and multi-output multi-class classifier
    """
    X, y = make_multilabel_classification(return_indicator=True,
                                          allow_unlabeled=False,
                                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    # Multi-output multi-class predict_proba
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_proba = clf.predict_proba(X_test)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
    assert_almost_equal(score1, score2)

    # Multi-output multi-class decision_function
    # TODO Is there any yet?
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    clf._predict_proba = clf.predict_proba
    clf.predict_proba = None
    clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]

    y_proba = clf.decision_function(X_test)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
    assert_almost_equal(score1, score2)

    # Multilabel predict_proba
    clf = OneVsRestClassifier(DecisionTreeClassifier())
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test))
    assert_almost_equal(score1, score2)

    # Multilabel decision function
    clf = OneVsRestClassifier(LinearSVC(random_state=0))
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    assert_almost_equal(score1, score2)
def test_thresholded_scorers_multilabel_indicator_data():
    """Test that the scorer work with multilabel-indicator format
    for multilabel and multi-output multi-class classifier
    """
    X, y = make_multilabel_classification(return_indicator=True,
                                          allow_unlabeled=False,
                                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    # Multi-output multi-class predict_proba
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_proba = clf.predict_proba(X_test)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
    assert_almost_equal(score1, score2)

    # Multi-output multi-class decision_function
    # TODO Is there any yet?
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    clf._predict_proba = clf.predict_proba
    clf.predict_proba = None
    clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]

    y_proba = clf.decision_function(X_test)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
    assert_almost_equal(score1, score2)

    # Multilabel predict_proba
    clf = OneVsRestClassifier(DecisionTreeClassifier())
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test))
    assert_almost_equal(score1, score2)

    # Multilabel decision function
    clf = OneVsRestClassifier(LinearSVC(random_state=0))
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    assert_almost_equal(score1, score2)
def test_regression_scorers():
    # Test regression scorers.
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = get_scorer('r2')(clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2)
def test_supervised_cluster_scorers():
    # Test clustering scorers against gold standard labeling.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    km = KMeans(n_clusters=3)
    km.fit(X_train)
    for name in CLUSTER_SCORERS:
        score1 = get_scorer(name)(km, X_test, y_test)
        score2 = getattr(cluster_module, name)(y_test, km.predict(X_test))
        assert_almost_equal(score1, score2)
def test_classification_scores():
    """Test classification scorers."""
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LinearSVC(random_state=0)
    clf.fit(X_train, y_train)

    for prefix, metric in [('f1', f1_score), ('precision', precision_score),
                           ('recall', recall_score)]:

        score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='weighted')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='macro')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='micro')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=1)
        assert_almost_equal(score1, score2)

    # test fbeta score that takes an argument
    scorer = make_scorer(fbeta_score, beta=2)
    score1 = scorer(clf, X_test, y_test)
    score2 = fbeta_score(y_test, clf.predict(X_test), beta=2,
                         average='weighted')
    assert_almost_equal(score1, score2)

    # test that custom scorer can be pickled
    unpickled_scorer = pickle.loads(pickle.dumps(scorer))
    score3 = unpickled_scorer(clf, X_test, y_test)
    assert_almost_equal(score1, score3)

    # smoke test the repr:
    repr(fbeta_score)
Exemple #20
0
    def evaluate(self, dataset, pipelines):
        if not self.is_valid(dataset):
            raise AssertionError("Dataset is not appropriate for evaluation")
        for subject in dataset.subject_list:
            # check if we already have result for this subject/pipeline
            # we might need a better granularity, if we query the DB
            run_pipes = self.results.not_yet_computed(pipelines, dataset, subject)
            if len(run_pipes) == 0:
                continue

            # get the data
            X, y, metadata = self.paradigm.get_data(
                dataset, [subject], self.return_epochs
            )
            le = LabelEncoder()
            y = y if self.mne_labels else le.fit_transform(y)
            groups = metadata.session.values
            scorer = get_scorer(self.paradigm.scoring)

            for name, clf in run_pipes.items():
                # we want to store a results per session
                cv = LeaveOneGroupOut()
                for train, test in cv.split(X, y, groups):
                    t_start = time()
                    if isinstance(X, BaseEpochs):
                        cvclf = clone(clf)
                        cvclf.fit(X[train], y[train])
                        score = scorer(cvclf, X[test], y[test])
                    else:
                        result = _fit_and_score(
                            clone(clf),
                            X,
                            y,
                            scorer,
                            train,
                            test,
                            verbose=False,
                            parameters=None,
                            fit_params=None,
                            error_score=self.error_score,
                        )
                        score = result["test_scores"]
                    duration = time() - t_start
                    nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1]
                    res = {
                        "time": duration,
                        "dataset": dataset,
                        "subject": subject,
                        "session": groups[test][0],
                        "score": score,
                        "n_samples": len(train),
                        "n_channels": nchan,
                        "pipeline": name,
                    }
                    yield res
Exemple #21
0
def test_classification_scorer_sample_weight():
    # Test that classification scorers support sample_weight or raise sensible
    # errors

    # Unlike the metrics invariance test, in the scorer case it's harder
    # to ensure that, on the classifier output, weighted and unweighted
    # scores really should be unequal.
    X, y = make_classification(random_state=0)
    _, y_ml = make_multilabel_classification(n_samples=X.shape[0], random_state=0)
    split = train_test_split(X, y, y_ml, random_state=0)
    X_train, X_test, y_train, y_test, y_ml_train, y_ml_test = split

    sample_weight = np.ones_like(y_test)
    sample_weight[:10] = 0

    # get sensible estimators for each metric
    estimator = _make_estimators(X_train, y_train, y_ml_train)

    for name in get_scorer_names():
        scorer = get_scorer(name)
        if name in REGRESSION_SCORERS:
            # skip the regression scores
            continue
        if name == "top_k_accuracy":
            # in the binary case k > 1 will always lead to a perfect score
            scorer._kwargs = {"k": 1}
        if name in MULTILABEL_ONLY_SCORERS:
            target = y_ml_test
        else:
            target = y_test
        try:
            weighted = scorer(
                estimator[name], X_test, target, sample_weight=sample_weight
            )
            ignored = scorer(estimator[name], X_test[10:], target[10:])
            unweighted = scorer(estimator[name], X_test, target)
            assert weighted != unweighted, (
                f"scorer {name} behaves identically when called with "
                f"sample weights: {weighted} vs {unweighted}"
            )
            assert_almost_equal(
                weighted,
                ignored,
                err_msg=(
                    f"scorer {name} behaves differently "
                    "when ignoring samples and setting "
                    f"sample_weight to 0: {weighted} vs {ignored}"
                ),
            )

        except TypeError as e:
            assert "sample_weight" in str(e), (
                f"scorer {name} raises unhelpful exception when called "
                f"with sample weights: {str(e)}"
            )
Exemple #22
0
def get_metric(metric, gib=True, needs_proba=False, needs_threshold=False):
    """Get the right metric depending on the input type.

    Parameters
    ----------
    metric: str or callable
        Metric as a string, function or scorer.

    gib: bool, optional (default=True)
        whether the metric is a score function or a loss function,
        i.e. if True, a higher score is better and if False, lower is
        better. Will be ignored if the metric is a string or a scorer.

    needs_proba: bool, optional (default=False)
        Whether the metric function requires probability estimates of
        a classifier. Is ignored if the metric is a string or a scorer.

    needs_threshold: bool, optional (default=False)
        Whether the metric function takes a continuous decision
        certainty. Is ignored if the metric is a string or a scorer.

    Returns
    -------
    scorer: callable
        Scorer object.

    """
    def get_scorer_name(scorer):
        """Return the name of the provided scorer."""
        for key, value in SCORERS.items():
            if scorer.__dict__ == value.__dict__:
                return key

    if isinstance(metric, str):
        if metric.lower() in METRIC_ACRONYMS:
            metric = METRIC_ACRONYMS[metric.lower()]
        elif metric not in SCORERS:
            raise ValueError("Unknown value for the metric parameter, got "
                             f"{metric}. Try one of: {', '.join(SCORERS)}.")
        metric = get_scorer(metric)
        metric.name = get_scorer_name(metric)

    elif hasattr(metric, "_score_func"):  # Provided metric is scoring
        metric.name = get_scorer_name(metric)

    else:  # Metric is a function with signature metric(y, y_pred)
        metric = make_scorer(
            score_func=metric,
            greater_is_better=gib,
            needs_proba=needs_proba,
            needs_threshold=needs_threshold,
        )
        metric.name = metric._score_func.__name__

    return metric
def test_multiclass_roc_no_proba_scorer_errors(scorer_name):
    # Perceptron has no predict_proba
    scorer = get_scorer(scorer_name)
    X, y = make_classification(n_classes=3,
                               n_informative=3,
                               n_samples=20,
                               random_state=0)
    lr = Perceptron().fit(X, y)
    msg = "'Perceptron' object has no attribute 'predict_proba'"
    with pytest.raises(AttributeError, match=msg):
        scorer(lr, X, y)
Exemple #24
0
def create_feature_reselection_experiment(maker=None, **user_kwargs):
    exp_kwargs = \
        dict(scorer=get_scorer('neg_median_absolute_error'),
             drift_detection=False,
             feature_reselection=True,
             feature_reselection_estimator_size=10,
             feature_reselection_strategy='quantile',
             feature_reselection_threshold=0.1,
             feature_reselection_quantile=0.5,
             feature_reselection_number=None)
    return _create_experiment(exp_kwargs, maker=maker, need_test=True, user_kwargs=user_kwargs)
def inhome_permutation_importance(estimator,
                                   feature_groups, X, y,
                                   scoring='f1_macro',
                                   n_repeats=10,
                                   random_state=23):

    result = {'score_difference': np.zeros((len(feature_groups), n_repeats)),
              'feature_group_names': np.zeros(len(feature_groups), dtype='O')}

    X_train_original, X_test, y_train, y_test = train_test_split(X, y,
                                                                test_size=.2,
                                                                stratify=y,
                                                                random_state=random_state)

    feature_list_indices = {col: idx for idx, col in enumerate(X_train_original.columns)}
    scorer = get_scorer(scoring)

    for i, (feature_group_name, feature_list) in enumerate(feature_groups.items()):
#         print(feature_group_name)

        score_difference = []

        for j in range(n_repeats):

            np.random.seed(random_state+j)
            X_train_permuted = X_train_original.copy()

            # permute feature values in selected columns
            for col in feature_list:
    #             print(col)
                col_idx = feature_list_indices[col]
                permuted_indices = np.random.permutation(X_train_original.shape[0])

#                 col = pd.DataFrame(np.random.uniform(low=-1.0, high=1.0, size=X_train_original.shape[0])) # fill with random values from U(-1, 1)
                col = X_train_permuted.iloc[permuted_indices, col_idx] # permute present values
                col.index = X_train_permuted.index
                X_train_permuted.iloc[:, col_idx] = col

#             X_train_permuted = X_train_permuted.drop(columns=feature_list)

            # train model using OLD data matrix X_train_original and evaluate
            est_original = estimator.fit(X_train_original, y_train)
            score_original = scorer(est_original, X_test, y_test)

            # train model using NEW data matrix X_train_permuted and evaluate
            est_permuted = estimator.fit(X_train_permuted, y_train)
            score_permuted = scorer(est_permuted, X_test, y_test)


            result['score_difference'][i, j] = score_original - score_permuted

        result['feature_group_names'][i] = feature_group_name

    return result
Exemple #26
0
def pack_score(y_test_true_all, y_test_predict_all, scoring):
    if scoring == 'neg_root_mean_squared_error':
        return np.sqrt(np.mean((y_test_true_all - y_test_predict_all)**2))

    scorer = get_scorer(scoring)

    scorer_func = scorer._score_func

    score = scorer_func(y_test_true_all, y_test_predict_all)

    return score
def test_multiclass_roc_proba_scorer(scorer_name, metric):
    scorer = get_scorer(scorer_name)
    X, y = make_classification(n_classes=3,
                               n_informative=3,
                               n_samples=20,
                               random_state=0)
    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
    y_proba = lr.predict_proba(X)
    expected_score = metric(y, y_proba)

    assert scorer(lr, X, y) == pytest.approx(expected_score)
Exemple #28
0
def test_classification_binary_scores(scorer_name, metric):
    # check consistency between score and scorer for scores supporting
    # binary classification.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LinearSVC(random_state=0)
    clf.fit(X_train, y_train)

    score = get_scorer(scorer_name)(clf, X_test, y_test)
    expected_score = metric(y_test, clf.predict(X_test))
    assert_almost_equal(score, expected_score)
Exemple #29
0
def run_grid_search(estimator, param_grid, metric, X, y, X_test, y_test, seed,
                    profile):
    _train_test_iter = StratifiedKFold(y,
                                       n_folds=5,
                                       shuffle=True,
                                       random_state=seed)
    inner_cv_func = lambda zx, zy: StratifiedShuffleSplit(
        zy, n_iter=10, test_size=0.2, random_state=seed)
    if metric == 'avgprec':
        scoring_func = get_scorer("average_precision")
    else:
        scoring_func = get_scorer("roc_auc")

    _grid_search = NestedGridSearchCV(estimator,
                                      param_grid,
                                      scoring_func,
                                      cv=_train_test_iter,
                                      inner_cv=inner_cv_func,
                                      profile=profile)
    _grid_search.fit(X, y, X_test=X_test, y_test=y_test)
    return _grid_search
    def __init__(self, estimator, k_features=1,
                 forward=True, floating=False,
                 verbose=0, scoring=None,
                 cv=5, n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):

        self.estimator = estimator
        self.k_features = k_features
        self.forward = forward
        self.floating = floating
        self.pre_dispatch = pre_dispatch
        # Want to raise meaningful error message if a
        # cross-validation generator is inputted
        if isinstance(cv, types.GeneratorType):
            err_msg = ('Input cv is a generator object, which is not '
                       'supported. Instead please input an iterable yielding '
                       'train, test splits. This can usually be done by '
                       'passing a cross-validation generator to the '
                       'built-in list function. I.e. cv=list(<cv-generator>)')
            raise TypeError(err_msg)
        self.cv = cv
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.clone_estimator = clone_estimator

        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.scoring = scoring

        if scoring is None:
            if self.est_._estimator_type == 'classifier':
                scoring = 'accuracy'
            elif self.est_._estimator_type == 'regressor':
                scoring = 'r2'
            else:
                raise AttributeError('Estimator must '
                                     'be a Classifier or Regressor.')
        if isinstance(scoring, str):
            self.scorer = get_scorer(scoring)
        else:
            self.scorer = scoring

        self.fitted = False
        self.subsets_ = {}
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
    def __init__(self,
                 estimator,
                 k_features='best',
                 forward=True,
                 floating=False,
                 print_progress=False,
                 verbose=1,
                 scoring=None,
                 cv=5,
                 skip_if_stuck=True,
                 n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):

        if print_progress:
            warnings.warn(
                "The print_progress parameter "
                "has been deprecated in "
                "0.4.3 and will be removed in 0.5.0. "
                "Please use the verbose parameter instead.",
                DeprecationWarning)
            if verbose == 0:
                verbose = 1

        self.estimator = estimator
        self.k_features = k_features
        self.forward = forward
        self.floating = floating
        self.pre_dispatch = pre_dispatch
        self.scoring = scoring
        if isinstance(scoring, str):
            self.scorer = get_scorer(scoring)
        else:
            self.scorer = scoring
        self.skip_if_stuck = skip_if_stuck
        self.cv = cv
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.named_est = {
            key: value
            for key, value in _name_estimators([self.estimator])
        }
        self.clone_estimator = clone_estimator
        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.fitted = False
        self.subsets_ = {}
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
Exemple #32
0
def test_get_feature_shap_values_per_fold(X, y):
    clf = DecisionTreeClassifier(max_depth=1)
    shap_values, train_score, test_score = ShapRFECV._get_feature_shap_values_per_fold(
        X,
        y,
        clf,
        train_index=[2, 3, 4, 5, 6, 7],
        val_index=[0, 1],
        scorer=get_scorer('roc_auc'))
    assert test_score == 1
    assert train_score > 0.9
    assert shap_values.shape == (2, 3)
Exemple #33
0
 def score_explicit(self, clf, X_train, y_train, X_test, y_test):
     scorer = get_scorer(self.paradigm.scoring)
     t_start = time()
     try:
         model = clf.fit(X_train, y_train)
         score = _score(model, X_test, y_test, scorer)
     except ValueError as e:
         if self.error_score == "raise":
             raise e
         score = self.error_score
     duration = time() - t_start
     return score, duration
Exemple #34
0
def test_get_feature_shap_values_per_fold(X, y):
    """
    Test with ShapRFECV with features per fold.
    """
    clf = DecisionTreeClassifier(max_depth=1)
    shap_elimination = ShapRFECV(clf)
    shap_values, train_score, test_score = shap_elimination._get_feature_shap_values_per_fold(
        X, y, clf, train_index=[2, 3, 4, 5, 6, 7], val_index=[0, 1], scorer=get_scorer("roc_auc")
    )
    assert test_score == 1
    assert train_score > 0.9
    assert shap_values.shape == (2, 3)
Exemple #35
0
 def _get_scorer_from_string(self, scoring):
     if scoring == 'my_scorer':
         if not self.kernel:
             myfunc = importlib.import_module(
                 'modules.myfuncs.%s' % self.configs['fit'].get('myfunc'))
         method_name = 'get_my_scorer'
         if not self.kernel:
             method_name = 'myfunc.%s' % method_name
         scorer = eval(method_name)()
     else:
         scorer = get_scorer(scoring)
     return scorer
def _test_ridge_loo(filter_):
    # test that can work with both dense or sparse matrices
    n_samples = X_diabetes.shape[0]

    ret = []

    fit_intercept = filter_ == DENSE_FILTER
    ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)

    # check best alpha
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    alpha_ = ridge_gcv.alpha_
    ret.append(alpha_)

    # check that we get same best alpha with custom loss_func
    f = ignore_warnings
    scoring = make_scorer(mean_squared_error, greater_is_better=False)
    ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv2.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with custom score_func
    func = lambda x, y: -mean_squared_error(x, y)
    scoring = make_scorer(func)
    ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv3.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with a scorer
    scorer = get_scorer('neg_mean_squared_error')
    ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
    ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv4.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with sample weights
    if filter_ == DENSE_FILTER:
        ridge_gcv.fit(filter_(X_diabetes), y_diabetes,
                      sample_weight=np.ones(n_samples))
        assert ridge_gcv.alpha_ == pytest.approx(alpha_)

    # simulate several responses
    Y = np.vstack((y_diabetes, y_diabetes)).T

    ridge_gcv.fit(filter_(X_diabetes), Y)
    Y_pred = ridge_gcv.predict(filter_(X_diabetes))
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    y_pred = ridge_gcv.predict(filter_(X_diabetes))

    assert_allclose(np.vstack((y_pred, y_pred)).T,
                    Y_pred, rtol=1e-5)

    return ret
def y_randomization_test(yx, model_dicts, crys_prop, scoring='f1_macro', n_repeats=25, savefig=False):

    fig, ax = plt.subplots(nrows=len(model_dicts[crys_prop]), ncols=1,
                           figsize=(6.5, len(model_dicts[crys_prop])*3),
                           sharex=True)

    scorer = get_scorer(scoring)

    for i, (model_name, model) in enumerate(model_dicts[crys_prop].items()):

        X = yx.iloc[:, 1:].copy()
        y = yx.iloc[:, 0].astype(int).copy()

        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=23)
        m = copy(model)
        y_initial_score = scorer(m.fit(X_train, y_train), X_test, y_test)

        y_randomized_scores = []
        for jj in range(n_repeats):
            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=23+jj)
            m = copy(model)

            np.random.seed(92)
            np.random.shuffle(y_train)

            score_y_randomized = scorer(m.fit(X_train, y_train), X_test, y_test)
            y_randomized_scores.append(score_y_randomized)


        #y_initial_score = train_test_estimate(X, y, copy(model), scorer)

        #y_randomized_scores = []
        #for _ in range(n_repeats):

        #    np.random.seed(92)
        #    np.random.shuffle(y)
        #    score_y_randomized = train_test_estimate(X, y, copy(model), scorer)
        #    y_randomized_scores.append(score_y_randomized)

        ax[i].hist(y_randomized_scores, bins=15, color='blue', label='y-randomized')
        ax[i].axvline(y_initial_score, color='red', lw=4, label='y-initial')
        ax[i].set_title('%s model' % (model_name))
        ax[i].legend(loc='upper left')
        ax[i].set_xlim(0.0, 1.0)

    ax[len(model_dicts[crys_prop])-1].set_xlabel(scoring)

    fig.suptitle(f'$\Delta${crys_prop} prediction', y=.995, fontsize=20)
    fig.tight_layout(rect=[0, 0, 1, 0.975])
    if savefig:
        fig.savefig(f'y_randomization_test_{crys_prop}.png', dpi=dpi)
    plt.show()
Exemple #38
0
def test_rfecv():
    generator = check_random_state(0)

    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)  # regression test: list should be supported

    # Test using the score function
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    rfecv.fit(X, y)
    # non-regression test for missing worst feature:
    assert_equal(len(rfecv.grid_scores_), X.shape[1])
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)

    # All the noisy variable were filtered out
    assert_array_equal(X_r, iris.data)

    # same in sparse
    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)

    # Test using a customized loss function
    scoring = make_scorer(zero_one_loss, greater_is_better=False)
    rfecv = RFECV(estimator=SVC(kernel="linear"),
                  step=1,
                  cv=5,
                  scoring=scoring)
    ignore_warnings(rfecv.fit)(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test using a scorer
    scorer = get_scorer('accuracy')
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, scoring=scorer)
    rfecv.fit(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test fix on grid_scores
    def test_scorer(estimator, X, y):
        return 1.0

    rfecv = RFECV(estimator=SVC(kernel="linear"),
                  step=1,
                  cv=5,
                  scoring=test_scorer)
    rfecv.fit(X, y)
    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
Exemple #39
0
def _test_ridge_loo(filter_):
    # test that can work with both dense or sparse matrices
    n_samples = X_diabetes.shape[0]

    ret = []

    fit_intercept = filter_ == DENSE_FILTER
    ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)

    # check best alpha
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    alpha_ = ridge_gcv.alpha_
    ret.append(alpha_)

    # check that we get same best alpha with custom loss_func
    f = ignore_warnings
    scoring = make_scorer(mean_squared_error, greater_is_better=False)
    ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv2.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with custom score_func
    func = lambda x, y: -mean_squared_error(x, y)
    scoring = make_scorer(func)
    ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv3.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with a scorer
    scorer = get_scorer('neg_mean_squared_error')
    ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
    ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv4.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with sample weights
    if filter_ == DENSE_FILTER:
        ridge_gcv.fit(filter_(X_diabetes),
                      y_diabetes,
                      sample_weight=np.ones(n_samples))
        assert ridge_gcv.alpha_ == pytest.approx(alpha_)

    # simulate several responses
    Y = np.vstack((y_diabetes, y_diabetes)).T

    ridge_gcv.fit(filter_(X_diabetes), Y)
    Y_pred = ridge_gcv.predict(filter_(X_diabetes))
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    y_pred = ridge_gcv.predict(filter_(X_diabetes))

    assert_allclose(np.vstack((y_pred, y_pred)).T, Y_pred, rtol=1e-5)

    return ret
Exemple #40
0
def custom_paired_ttest_cv(estimator1, estimator2, X, y,
                          cv=10,
                          scoring=None,
                          shuffle=False,
                          random_seed=None):
    
    kf = StratifiedKFold(random_state=random_seed, n_splits=cv, shuffle=True)  

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    score_diff = []

    # this is probably wrong :(
    for train_index, test_index in kf.split(X, y):
        ##### THIS IS WHERE IT BECOMES "CUSTOM"
        if isinstance(X, pd.DataFrame):
            X_train = X.iloc[train_index]
            X_test = X.iloc[test_index]
        else:
            X_train = [X[i] for i in train_index]
            X_test = [X[i] for i in test_index]
        #####

        y_train, y_test = y[train_index], y[test_index]

        estimator1.fit(X_train, y_train)
        estimator2.fit(X_train, y_train)

        est1_score = scorer(estimator1, X_test, y_test)
        est2_score = scorer(estimator2, X_test, y_test)
        score_diff.append(est1_score - est2_score)

    avg_diff = np.mean(score_diff)

    numerator = avg_diff * np.sqrt(cv)
    denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
                          / (cv - 1))
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), cv - 1)*2.
    return float(t_stat), float(pvalue)
Exemple #41
0
def fit_and_dump(_x, _y, args):
    data = _x.copy()
    _x = categorical_to_numeric(_x)
    _y = _y[args.event].cat.codes.values

    model = create_estimator(data, _y, args.seed)
    if args.metric == 'avgprec':
        scoring_func = get_scorer("average_precision")
    else:
        scoring_func = get_scorer("roc_auc")
    model.set_params(scorer=scoring_func)

    print("Number of base estimators: %d" % len(model.base_estimators))

    print("Purging MongoDB cv_scores database")
    client = MongoClient(mongodb_host)
    db = client.ensemble_selection_classification
    db.cv_scores.remove({})

    print("Fitting %r" % model)
    _create_directories(args.models_dir, model.base_estimators)
    return model.fit(_x.values, _y)
Exemple #42
0
    def train(self, X, y, X_test):
        # searcher = MCTSSearcher(self.search_space_fn, use_meta_learner=self.use_meta_learner, max_node_space=10,
        #                         candidates_size=10,
        #                         optimize_direction=OptimizeDirection.Maximize)
        searcher = EvolutionSearcher(
            self.search_space_fn,
            optimize_direction=self.optimize_direction,
            population_size=30,
            sample_size=10,
            regularized=True,
            candidates_size=10,
            use_meta_learner=self.use_meta_learner)
        # searcher = RandomSearcher(lambda: search_space_general(early_stopping_rounds=20, verbose=0),
        #                     optimize_direction=OptimizeDirection.Maximize)
        es = EarlyStoppingCallback(self.earlystop_rounds,
                                   self.optimize_direction,
                                   time_limit=self.time_limit,
                                   expected_reward=self.expected_reward)

        hk = HyperGBM(searcher,
                      reward_metric=self.reward_metric,
                      cache_dir=f'hypergbm_cache',
                      clear_cache=True,
                      callbacks=[es, SummaryCallback()])

        log_callback = ConsoleCallback()
        self.experiment = CompeteExperiment(
            hk,
            X,
            y,
            X_test=X_test,
            eval_size=self.eval_size,
            train_test_split_strategy=self.train_test_split_strategy,
            cv=self.cv,
            num_folds=self.num_folds,
            callbacks=[],
            scorer=get_scorer(self.scorer),
            drop_feature_with_collinearity=self.drop_feature_with_collinearity,
            drift_detection=True,
            n_est_feature_importance=5,
            importance_threshold=1e-5,
            two_stage_importance_selection=self.two_stage_importance_selection,
            ensemble_size=self.ensemble_size,
            pseudo_labeling=self.pseudo_labeling,
            pseudo_labeling_proba_threshold=self.
            pseudo_labeling_proba_threshold,
            pseudo_labeling_resplit=self.pseudo_labeling_resplit,
            retrain_on_wholedata=self.retrain_on_wholedata,
        )
        self.estimator = self.experiment.run(use_cache=self.use_cache,
                                             max_trials=self.max_trials)
Exemple #43
0
    def score(self, X, y):
        """Score each estimator/data slice couple.

        Parameters
        ----------
        X : array, shape (n_samples, nd_features, n_estimators)
            The input samples. For each data slice, the corresponding estimator
            scores the prediction: e.g. [estimators[ii].score(X[..., ii], y)
                                         for ii in range(n_estimators)]
            The feature dimension can be multidimensional e.g.
            X.shape = (n_samples, n_features_1, n_features_2, n_estimators)

        y : array, shape (n_samples,) | (n_samples, n_targets)
            The target values.

        Returns
        -------
        score : array, shape (n_samples, n_estimators)
            Score for each estimator / data slice couple.
        """
        from sklearn.metrics import make_scorer, get_scorer
        self._check_Xy(X)
        if X.shape[-1] != len(self.estimators_):
            raise ValueError('The number of estimators does not match '
                             'X.shape[-1]')

        # If scoring is None (default), the predictions are internally
        # generated by estimator.score(). Else, we must first get the
        # predictions based on the scorer.
        if not isinstance(self.scoring, str):
            scoring_ = (make_scorer(self.scoring)
                        if self.scoring is not None else self.scoring)

        elif self.scoring is not None:
            scoring_ = get_scorer(self.scoring)

        # For predictions/transforms the parallelization is across the data and
        # not across the estimators to avoid memory load.
        parallel, p_func, n_jobs = parallel_func(_sl_score, self.n_jobs)
        n_jobs = min(n_jobs, X.shape[-1])
        X_splits = np.array_split(X, n_jobs, axis=-1)
        est_splits = np.array_split(self.estimators_, n_jobs)
        score = parallel(
            p_func(est, scoring_, X, y)
            for (est, x) in zip(est_splits, X_splits))

        if n_jobs > 1:
            score = np.concatenate(score, axis=0)
        else:
            score = score[0]
        return score
Exemple #44
0
    def __init__(self, estimator, k_features=1,
                 forward=True, floating=False,
                 verbose=0, scoring=None,
                 cv=5, n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):

        self.estimator = estimator
        self.k_features = k_features
        self.forward = forward
        self.floating = floating
        self.pre_dispatch = pre_dispatch
        # Want to raise meaningful error message if a
        # cross-validation generator is inputted
        if isinstance(cv, types.GeneratorType):
            err_msg = ('Input cv is a generator object, which is not '
                       'supported. Instead please input an iterable yielding '
                       'train, test splits. This can usually be done by '
                       'passing a cross-validation generator to the '
                       'built-in list function. I.e. cv=list(<cv-generator>)')
            raise TypeError(err_msg)
        self.cv = cv
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.clone_estimator = clone_estimator

        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.scoring = scoring

        if scoring is None:
            if self.est_._estimator_type == 'classifier':
                scoring = 'accuracy'
            elif self.est_._estimator_type == 'regressor':
                scoring = 'r2'
            else:
                raise AttributeError('Estimator must '
                                     'be a Classifier or Regressor.')
        if isinstance(scoring, str):
            self.scorer = get_scorer(scoring)
        else:
            self.scorer = scoring

        self.fitted = False
        self.subsets_ = {}
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
Exemple #45
0
    def score(self, X, y):
        """Score each estimator/data slice couple.

        Parameters
        ----------
        X : array, shape (n_samples, nd_features, n_estimators)
            The input samples. For each data slice, the corresponding estimator
            scores the prediction: e.g. [estimators[ii].score(X[..., ii], y)
                                         for ii in range(n_estimators)]
            The feature dimension can be multidimensional e.g.
            X.shape = (n_samples, n_features_1, n_features_2, n_estimators)

        y : array, shape (n_samples,) | (n_samples, n_targets)
            The target values.

        Returns
        -------
        score : array, shape (n_samples, n_estimators)
            Score for each estimator / data slice couple.
        """
        from sklearn.metrics import make_scorer, get_scorer
        self._check_Xy(X)
        if X.shape[-1] != len(self.estimators_):
            raise ValueError('The number of estimators does not match '
                             'X.shape[-1]')

        # If scoring is None (default), the predictions are internally
        # generated by estimator.score(). Else, we must first get the
        # predictions based on the scorer.
        if not isinstance(self.scoring, str):
            scoring_ = (make_scorer(self.scoring) if self.scoring is
                        not None else self.scoring)

        elif self.scoring is not None:
            scoring_ = get_scorer(self.scoring)

        # For predictions/transforms the parallelization is across the data and
        # not across the estimators to avoid memory load.
        parallel, p_func, n_jobs = parallel_func(_sl_score, self.n_jobs)
        n_jobs = min(n_jobs, X.shape[-1])
        X_splits = np.array_split(X, n_jobs, axis=-1)
        est_splits = np.array_split(self.estimators_, n_jobs)
        score = parallel(p_func(est, scoring_, X, y)
                         for (est, x) in zip(est_splits, X_splits))

        if n_jobs > 1:
            score = np.concatenate(score, axis=0)
        else:
            score = score[0]
        return score
Exemple #46
0
 def evaluate(self, X, y, X_vald=None, y_vald=None):
     clf = RandomForestClassifier(n_estimators=32, max_depth=3,
                                  n_jobs=-1)  # used as base classifier
     if X_vald is None:
         return cross_val_score(clf,
                                X,
                                y,
                                scoring=self._metric,
                                cv=self._cv_folds,
                                n_jobs=-1).mean()
     else:
         clf.fit(X, y)
         sk = get_scorer(self._metric)
         return sk(clf, X_vald, y_vald)
 def __init__(self, n_jobs=-1, offset_scale=1.0, n_buckets=2, initial_offsets=None, scoring='accuracy'):
     self.n_jobs = int(n_jobs)
     self.offset_scale = float(offset_scale)
     self.n_buckets = int(n_buckets)
     if initial_offsets is None:
         self.initial_offsets_ = [-0.5] * self.n_buckets
         pass
     else:
         self.initial_offsets_ = list(initial_offsets)
         assert(len(self.initial_offsets_) == self.n_buckets)
         pass
     from sklearn.metrics import get_scorer
     self.scoring = get_scorer(scoring)
     pass
    def __init__(self, estimator, k_features=1,
                 forward=True, floating=False,
                 verbose=0, scoring=None,
                 cv=5, skip_if_stuck=True, n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):

        self.estimator = estimator
        self.k_features = k_features
        self.forward = forward
        self.floating = floating
        self.pre_dispatch = pre_dispatch
        self.cv = cv
        self.n_jobs = n_jobs
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.skip_if_stuck = skip_if_stuck
        self.cv = cv
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.clone_estimator = clone_estimator

        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.scoring = scoring

        if scoring is None:
            if self.est_._estimator_type == 'classifier':
                scoring = 'accuracy'
            elif self.est_._estimator_type == 'regressor':
                scoring = 'r2'
            else:
                raise AttributeError('Estimator must '
                                     'be a Classifier or Regressor.')
        if isinstance(scoring, str):
            self.scorer = get_scorer(scoring)
        else:
            self.scorer = scoring

        self.fitted = False
        self.subsets_ = {}
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
    def __init__(self, estimator, k_features=1,
                 forward=True, floating=False,
                 verbose=0, scoring=None,
                 cv=5, n_jobs=1,
                 pre_dispatch='2*n_jobs',
                 clone_estimator=True):

        self.estimator = estimator
        self.k_features = k_features
        self.forward = forward
        self.floating = floating
        self.pre_dispatch = pre_dispatch
        self.cv = cv
        self.n_jobs = n_jobs
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.cv = cv
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.named_est = {key: value for key, value in
                          _name_estimators([self.estimator])}
        self.clone_estimator = clone_estimator

        if self.clone_estimator:
            self.est_ = clone(self.estimator)
        else:
            self.est_ = self.estimator
        self.scoring = scoring

        if scoring is None:
            if self.est_._estimator_type == 'classifier':
                scoring = 'accuracy'
            elif self.est_._estimator_type == 'regressor':
                scoring = 'r2'
            else:
                raise AttributeError('Estimator must '
                                     'be a Classifier or Regressor.')
        if isinstance(scoring, str):
            self.scorer = get_scorer(scoring)
        else:
            self.scorer = scoring

        self.fitted = False
        self.subsets_ = {}
        self.interrupted_ = False

        # don't mess with this unless testing
        self._TESTING_INTERRUPT_MODE = False
 def __init__(self, estimator, k_features,
              forward=True, floating=False,
              print_progress=True, scoring='accuracy',
              cv=5, skip_if_stuck=True, n_jobs=1,
              pre_dispatch='2*n_jobs'):
     self.estimator = clone(estimator)
     self.k_features = k_features
     self.forward = forward
     self.floating = floating
     self.pre_dispatch = pre_dispatch
     self.scoring = scoring
     self.scorer = get_scorer(scoring)
     self.skip_if_stuck = skip_if_stuck
     self.cv = cv
     self.print_progress = print_progress
     self.n_jobs = n_jobs
def test_deprecated_names():
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)

    for name in ('mean_absolute_error', 'mean_squared_error',
                 'median_absolute_error', 'log_loss'):
        warning_msg = "Scoring method %s was renamed to" % name
        for scorer in (get_scorer(name), SCORERS[name]):
            assert_warns_message(DeprecationWarning,
                                 warning_msg,
                                 scorer, clf, X, y)

        assert_warns_message(DeprecationWarning,
                             warning_msg,
                             cross_val_score, clf, X, y, scoring=name)
 def __init__(self, estimator, min_features=1, max_features=1,
              print_progress=True, scoring='accuracy',
              cv=5, n_jobs=1,
              pre_dispatch='2*n_jobs',
              clone_estimator=True):
     self.estimator = estimator
     self.min_features = min_features
     self.max_features = max_features
     self.pre_dispatch = pre_dispatch
     self.scoring = scoring
     self.scorer = get_scorer(scoring)
     self.cv = cv
     self.print_progress = print_progress
     self.n_jobs = n_jobs
     self.named_est = {key: value for key, value in
                       _name_estimators([self.estimator])}
     self.clone_estimator = clone_estimator
     if self.clone_estimator:
         self.est_ = clone(self.estimator)
     else:
         self.est_ = self.estimator
     self.fitted = False
Exemple #53
0
def test_rfecv():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)   # regression test: list should be supported

    # Test using the score function
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    rfecv.fit(X, y)
    # non-regression test for missing worst feature:
    assert_equal(len(rfecv.grid_scores_), X.shape[1])
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)

    # All the noisy variable were filtered out
    assert_array_equal(X_r, iris.data)

    # same in sparse
    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)

    # Test using a customized loss function
    scoring = make_scorer(zero_one_loss, greater_is_better=False)
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
                  scoring=scoring)
    ignore_warnings(rfecv.fit)(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test using a scorer
    scorer = get_scorer('accuracy')
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
                  scoring=scorer)
    rfecv.fit(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test fix on grid_scores
    def test_scorer(estimator, X, y):
        return 1.0
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
                  scoring=test_scorer)
    rfecv.fit(X, y)
    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
    # In the event of cross validation score ties, the expected behavior of
    # RFECV is to return the FEWEST features that maximize the CV score.
    # Because test_scorer always returns 1.0 in this example, RFECV should
    # reduce the dimensionality to a single feature (i.e. n_features_ = 1)
    assert_equal(rfecv.n_features_, 1)

    # Same as the first two tests, but with step=2
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
    rfecv.fit(X, y)
    assert_equal(len(rfecv.grid_scores_), 6)
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)

    # Verifying that steps < 1 don't blow up.
    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=.2, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)
Exemple #54
0
def paired_ttest_5x2cv(estimator1, estimator2, X, y,
                       scoring=None,
                       random_seed=None):
    """
    Implements the 5x2cv paired t test proposed
    by Dieterrich (1998)
    to compare the performance of two models.

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    random_seed : int or None (default: None)
        Random seed for creating the test/train splits.

    Returns
    ----------
    t : float
        The t-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    """
    rng = np.random.RandomState(random_seed)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    variance_sum = 0.
    first_diff = None

    def score_diff(X_1, X_2, y_1, y_2):

        estimator1.fit(X_1, y_1)
        estimator2.fit(X_1, y_1)
        est1_score = scorer(estimator1, X_2, y_2)
        est2_score = scorer(estimator2, X_2, y_2)
        score_diff = est1_score - est2_score
        return score_diff

    for i in range(5):

        randint = rng.randint(low=0, high=32767)
        X_1, X_2, y_1, y_2 = \
            train_test_split(X, y, test_size=0.5,
                             random_state=randint)

        score_diff_1 = score_diff(X_1, X_2, y_1, y_2)
        score_diff_2 = score_diff(X_2, X_1, y_2, y_1)
        score_mean = (score_diff_1 + score_diff_2) / 2.
        score_var = ((score_diff_1 - score_mean)**2 +
                     (score_diff_2 - score_mean)**2)
        variance_sum += score_var
        if first_diff is None:
            first_diff = score_diff_1

    numerator = first_diff
    denominator = np.sqrt(1/5. * variance_sum)
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), 5)*2.
    return float(t_stat), float(pvalue)
def tune(X, y, estimator, param_grid):
    gcv = GridSearchCV(estimator, param_grid, refit=True, scoring=get_scorer('roc_auc'),
                       n_jobs=-1, verbose=5)
    gcv.fit(X, y)
    return gcv.best_estimator_
Exemple #56
0
def paired_ttest_resampled(estimator1, estimator2, X, y,
                           num_rounds=30, test_size=0.3,
                           scoring=None,
                           random_seed=None):
    """
    Implements the resampled paired t test procedure
    to compare the performance of two models
    (also called k-hold-out paired t test).

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    num_rounds : int (default: 30)
        Number of resampling iterations
        (i.e., train/test splits)

    test_size : float or int (default: 0.3)
        If float, should be between 0.0 and 1.0 and
        represent the proportion of the dataset to use
        as a test set.
        If int, represents the absolute number of test exsamples.

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    random_seed : int or None (default: None)
        Random seed for creating the test/train splits.

    Returns
    ----------
    t : float
        The t-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    """
    if (not isinstance(test_size, int) and not isinstance(test_size, float)):
        raise ValueError('train_size must be of '
                         'type int or float. Got %s.' % type(test_size))

    rng = np.random.RandomState(random_seed)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    score_diff = []
    for i in range(num_rounds):

        randint = rng.randint(low=0, high=32767)

        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=test_size,
                             random_state=randint)

        estimator1.fit(X_train, y_train)
        estimator2.fit(X_train, y_train)

        est1_score = scorer(estimator1, X_test, y_test)
        est2_score = scorer(estimator2, X_test, y_test)
        score_diff.append(est1_score - est2_score)

    avg_diff = np.mean(score_diff)

    numerator = avg_diff * np.sqrt(num_rounds)
    denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
                          / (num_rounds - 1))
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), num_rounds - 1)*2.
    return float(t_stat), float(pvalue)
Exemple #57
0
def combined_ftest_5x2cv(estimator1, estimator2, X, y,
                         scoring=None,
                         random_seed=None):
    """
    Implements the 5x2cv combined F test proposed
    by Alpaydin 1999,
    to compare the performance of two models.

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    random_seed : int or None (default: None)
        Random seed for creating the test/train splits.

    Returns
    ----------
    f : float
        The F-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/combined_ftest_5x2cv/

    """
    rng = np.random.RandomState(random_seed)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    variances = []
    differences = []

    def score_diff(X_1, X_2, y_1, y_2):

        estimator1.fit(X_1, y_1)
        estimator2.fit(X_1, y_1)
        est1_score = scorer(estimator1, X_2, y_2)
        est2_score = scorer(estimator2, X_2, y_2)
        score_diff = est1_score - est2_score
        return score_diff

    for i in range(5):

        randint = rng.randint(low=0, high=32767)
        X_1, X_2, y_1, y_2 = \
            train_test_split(X, y, test_size=0.5,
                             random_state=randint)

        score_diff_1 = score_diff(X_1, X_2, y_1, y_2)
        score_diff_2 = score_diff(X_2, X_1, y_2, y_1)
        score_mean = (score_diff_1 + score_diff_2) / 2.
        score_var = ((score_diff_1 - score_mean)**2 +
                     (score_diff_2 - score_mean)**2)

        differences.extend([score_diff_1**2, score_diff_2**2])
        variances.append(score_var)

    numerator = sum(differences)
    denominator = 2*(sum(variances))
    f_stat = numerator / denominator

    p_value = scipy.stats.f.sf(f_stat, 10, 5)

    return float(f_stat), float(p_value)
def test_SearchLight():
    """Test _SearchLight"""
    from sklearn.linear_model import Ridge, LogisticRegression
    from sklearn.pipeline import make_pipeline
    from sklearn.metrics import roc_auc_score, get_scorer, make_scorer

    X, y = make_data()
    n_epochs, _, n_time = X.shape
    # init
    assert_raises(ValueError, _SearchLight, 'foo')
    sl = _SearchLight(Ridge())
    sl = _SearchLight(LogisticRegression())
    # fit
    assert_equal(sl.__repr__()[:14], '<_SearchLight(')
    sl.fit(X, y)
    assert_equal(sl.__repr__()[-28:], ', fitted with 10 estimators>')
    assert_raises(ValueError, sl.fit, X[1:], y)
    assert_raises(ValueError, sl.fit, X[:, :, 0], y)

    # transforms
    assert_raises(ValueError, sl.predict, X[:, :, :2])
    y_pred = sl.predict(X)
    assert_true(y_pred.dtype == int)
    assert_array_equal(y_pred.shape, [n_epochs, n_time])
    y_proba = sl.predict_proba(X)
    assert_true(y_proba.dtype == float)
    assert_array_equal(y_proba.shape, [n_epochs, n_time, 2])

    # score
    score = sl.score(X, y)
    assert_array_equal(score.shape, [n_time])
    assert_true(np.sum(np.abs(score)) != 0)
    assert_true(score.dtype == float)

    sl = _SearchLight(LogisticRegression())
    assert_equal(sl.scoring, None)

    # Scoring method
    for err, scoring in [(ValueError, 'foo'), (TypeError, 999)]:
        sl = _SearchLight(LogisticRegression(), scoring=scoring)
        sl.fit(X, y)
        assert_raises(err, sl.score, X, y)

    # Check sklearn's roc_auc fix: scikit-learn/scikit-learn#6874
    # -- 3 class problem
    sl = _SearchLight(LogisticRegression(random_state=0), scoring='roc_auc')
    y = np.arange(len(X)) % 3
    sl.fit(X, y)
    assert_raises(ValueError, sl.score, X, y)
    # -- 2 class problem not in [0, 1]
    y = np.arange(len(X)) % 2 + 1
    sl.fit(X, y)
    score = sl.score(X, y)
    assert_array_equal(score, [roc_auc_score(y - 1, _y_pred - 1)
                               for _y_pred in sl.decision_function(X).T])
    y = np.arange(len(X)) % 2

    for method, scoring in [
            ('predict_proba', 'roc_auc'), ('predict', roc_auc_score)]:
        sl1 = _SearchLight(LogisticRegression(), scoring=scoring)
        sl1.fit(X, y)
        np.random.seed(0)
        X = np.random.randn(*X.shape)  # randomize X to avoid AUCs in [0, 1]
        score_sl = sl1.score(X, y)
        assert_array_equal(score_sl.shape, [n_time])
        assert_true(score_sl.dtype == float)

        # Check that scoring was applied adequately
        if isinstance(scoring, str):
            scoring = get_scorer(scoring)
        else:
            scoring = make_scorer(scoring)

        score_manual = [scoring(est, x, y) for est, x in zip(
                        sl1.estimators_, X.transpose(2, 0, 1))]
        assert_array_equal(score_manual, score_sl)

    # n_jobs
    sl = _SearchLight(LogisticRegression(random_state=0), n_jobs=1,
                      scoring='roc_auc')
    score_1job = sl.fit(X, y).score(X, y)
    sl.n_jobs = 2
    score_njobs = sl.fit(X, y).score(X, y)
    assert_array_equal(score_1job, score_njobs)
    sl.predict(X)

    # n_jobs > n_estimators
    sl.fit(X[..., [0]], y)
    sl.predict(X[..., [0]])

    # pipeline

    class _LogRegTransformer(LogisticRegression):
        # XXX needs transformer in pipeline to get first proba only
        def transform(self, X):
            return super(_LogRegTransformer, self).predict_proba(X)[..., 1]

    pipe = make_pipeline(_SearchLight(_LogRegTransformer()),
                         LogisticRegression())
    pipe.fit(X, y)
    pipe.predict(X)

    # n-dimensional feature space
    X = np.random.rand(10, 3, 4, 2)
    y = np.arange(10) % 2
    y_preds = list()
    for n_jobs in [1, 2]:
        pipe = _SearchLight(make_pipeline(Vectorizer(), LogisticRegression()),
                            n_jobs=n_jobs)
        y_preds.append(pipe.fit(X, y).predict(X))
        features_shape = pipe.estimators_[0].steps[0][1].features_shape_
        assert_array_equal(features_shape, [3, 4])
    assert_array_equal(y_preds[0], y_preds[1])
Exemple #59
0
def _test_ridge_loo(filter_):
    # test that can work with both dense or sparse matrices
    n_samples = X_diabetes.shape[0]

    ret = []

    ridge_gcv = _RidgeGCV(fit_intercept=False)
    ridge = Ridge(alpha=1.0, fit_intercept=False)

    # generalized cross-validation (efficient leave-one-out)
    decomp = ridge_gcv._pre_compute(X_diabetes, y_diabetes)
    errors, c = ridge_gcv._errors(1.0, y_diabetes, *decomp)
    values, c = ridge_gcv._values(1.0, y_diabetes, *decomp)

    # brute-force leave-one-out: remove one example at a time
    errors2 = []
    values2 = []
    for i in range(n_samples):
        sel = np.arange(n_samples) != i
        X_new = X_diabetes[sel]
        y_new = y_diabetes[sel]
        ridge.fit(X_new, y_new)
        value = ridge.predict([X_diabetes[i]])[0]
        error = (y_diabetes[i] - value) ** 2
        errors2.append(error)
        values2.append(value)

    # check that efficient and brute-force LOO give same results
    assert_almost_equal(errors, errors2)
    assert_almost_equal(values, values2)

    # generalized cross-validation (efficient leave-one-out,
    # SVD variation)
    decomp = ridge_gcv._pre_compute_svd(X_diabetes, y_diabetes)
    errors3, c = ridge_gcv._errors_svd(ridge.alpha, y_diabetes, *decomp)
    values3, c = ridge_gcv._values_svd(ridge.alpha, y_diabetes, *decomp)

    # check that efficient and SVD efficient LOO give same results
    assert_almost_equal(errors, errors3)
    assert_almost_equal(values, values3)

    # check best alpha
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    alpha_ = ridge_gcv.alpha_
    ret.append(alpha_)

    # check that we get same best alpha with custom loss_func
    f = ignore_warnings
    scoring = make_scorer(mean_squared_error, greater_is_better=False)
    ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
    assert_equal(ridge_gcv2.alpha_, alpha_)

    # check that we get same best alpha with custom score_func
    func = lambda x, y: -mean_squared_error(x, y)
    scoring = make_scorer(func)
    ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
    assert_equal(ridge_gcv3.alpha_, alpha_)

    # check that we get same best alpha with a scorer
    scorer = get_scorer('mean_squared_error')
    ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
    ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
    assert_equal(ridge_gcv4.alpha_, alpha_)

    # check that we get same best alpha with sample weights
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes,
                  sample_weight=np.ones(n_samples))
    assert_equal(ridge_gcv.alpha_, alpha_)

    # simulate several responses
    Y = np.vstack((y_diabetes, y_diabetes)).T

    ridge_gcv.fit(filter_(X_diabetes), Y)
    Y_pred = ridge_gcv.predict(filter_(X_diabetes))
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    y_pred = ridge_gcv.predict(filter_(X_diabetes))

    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T,
                              Y_pred, decimal=5)

    return ret
Exemple #60
0
def paired_ttest_kfold_cv(estimator1, estimator2, X, y,
                          cv=10,
                          scoring=None,
                          shuffle=False,
                          random_seed=None):
    """
    Implements the k-fold paired t test procedure
    to compare the performance of two models.

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    cv : int (default: 10)
        Number of splits and iteration for the
        cross-validation procedure

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    shuffle : bool (default: True)
        Whether to shuffle the dataset for generating
        the k-fold splits.

    random_seed : int or None (default: None)
        Random seed for shuffling the dataset
        for generating the k-fold splits.
        Ignored if shuffle=False.

    Returns
    ----------
    t : float
        The t-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    """

    kf = KFold(n_splits=cv, random_state=random_seed, shuffle=shuffle)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    score_diff = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        estimator1.fit(X_train, y_train)
        estimator2.fit(X_train, y_train)

        est1_score = scorer(estimator1, X_test, y_test)
        est2_score = scorer(estimator2, X_test, y_test)
        score_diff.append(est1_score - est2_score)

    avg_diff = np.mean(score_diff)

    numerator = avg_diff * np.sqrt(cv)
    denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
                          / (cv - 1))
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), cv - 1)*2.
    return float(t_stat), float(pvalue)