Esempio n. 1
0
def test_thresholded_scorers():
    # Test scorers that take thresholds.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = get_scorer('neg_log_loss')(clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # test with a regressor (no decision_function)
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(reg, X_test, y_test)
    score2 = roc_auc_score(y_test, reg.predict(X_test))
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    with pytest.raises(ValueError, match="multiclass format is not supported"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # test error is raised with a single class present in model
    # (predict_proba shape is not suitable for binary auc)
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = DecisionTreeClassifier()
    clf.fit(X_train, np.zeros_like(y_train))
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # for proba scorers
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('neg_log_loss')(clf, X_test, y_test)
Esempio n. 2
0
def test_multiclass_roc_proba_scorer(scorer_name, metric):
    scorer = get_scorer(scorer_name)
    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
                               random_state=0)
    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
    y_proba = lr.predict_proba(X)
    expected_score = metric(y, y_proba)

    assert scorer(lr, X, y) == pytest.approx(expected_score)
Esempio n. 3
0
def test_multiclass_roc_proba_scorer_label():
    scorer = make_scorer(roc_auc_score, multi_class='ovo',
                         labels=[0, 1, 2], needs_proba=True)
    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
                               random_state=0)
    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
    y_proba = lr.predict_proba(X)

    y_binary = y == 0
    expected_score = roc_auc_score(y_binary, y_proba,
                                   multi_class='ovo',
                                   labels=[0, 1, 2])

    assert scorer(lr, X, y_binary) == pytest.approx(expected_score)
Esempio n. 4
0
def test_sample_weight():
    """Tests sample_weight parameter of VotingClassifier"""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = SVC(probability=True, random_state=123)
    eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                         ('svc', clf3)],
                             voting='soft').fit(X,
                                                y,
                                                sample_weight=np.ones(
                                                    (len(y), )))
    eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                         ('svc', clf3)],
                             voting='soft').fit(X, y)
    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))

    sample_weight = np.random.RandomState(123).uniform(size=(len(y), ))
    eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
    eclf3.fit(X, y, sample_weight)
    clf1.fit(X, y, sample_weight)
    assert_array_equal(eclf3.predict(X), clf1.predict(X))
    assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))

    # check that an error is raised and indicative if sample_weight is not
    # supported.
    clf4 = KNeighborsClassifier()
    eclf3 = VotingClassifier(estimators=[('lr', clf1), ('svc', clf3),
                                         ('knn', clf4)],
                             voting='soft')
    msg = ('Underlying estimator KNeighborsClassifier does not support '
           'sample weights.')
    with pytest.raises(TypeError, match=msg):
        eclf3.fit(X, y, sample_weight)

    # check that _parallel_fit_estimator will raise the right error
    # it should raise the original error if this is not linked to sample_weight
    class ClassifierErrorFit(ClassifierMixin, BaseEstimator):
        def fit(self, X, y, sample_weight):
            raise TypeError('Error unrelated to sample_weight.')

    clf = ClassifierErrorFit()
    with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
        clf.fit(X, y, sample_weight=sample_weight)