예제 #1
0
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = DaskGridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = DaskGridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = DaskGridSearchCV(LinearSVCNoScore(), grid,
                                                  scoring='roc_auc').fit(X, y)
    search_auc = DaskGridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # Check warning only occurs in situation where behavior changed:
    # estimator requires score method to compete with scoring parameter
    score_no_scoring = search_no_scoring.score(X, y)
    score_accuracy = search_accuracy.score(X, y)
    score_no_score_auc = search_no_score_method_auc.score(X, y)
    score_auc = search_auc.score(X, y)

    # ensure the test is sane
    assert score_auc < 1.0
    assert score_accuracy < 1.0
    assert score_auc != score_accuracy

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
예제 #2
0
def test_grid_search_no_score():
    # Test grid-search on classifier that has no score function.
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    clf_no_score = LinearSVCNoScore(random_state=0)

    # XXX: It seems there's some global shared state in LinearSVC - fitting
    # multiple `SVC` instances in parallel using threads sometimes results in
    # wrong results. This only happens with threads, not processes/sync.
    # For now, we'll fit using the sync scheduler.
    grid_search = DaskGridSearchCV(clf, {'C': Cs}, scoring='accuracy',
                                   get=dask.get)
    grid_search.fit(X, y)

    grid_search_no_score = DaskGridSearchCV(clf_no_score, {'C': Cs},
                                            scoring='accuracy', get=dask.get)
    # smoketest grid search
    grid_search_no_score.fit(X, y)

    # check that best params are equal
    assert grid_search_no_score.best_params_ == grid_search.best_params_
    # check that we can call score and that it gives the correct result
    assert grid_search.score(X, y) == grid_search_no_score.score(X, y)

    # giving no scoring function raises an error
    grid_search_no_score = DaskGridSearchCV(clf_no_score, {'C': Cs})
    with pytest.raises(TypeError) as exc:
        grid_search_no_score.fit([[1]])
    assert "no scoring" in str(exc.value)
예제 #3
0
def test_grid_search():
    # Test that the best estimator contains the right value for foo_param
    clf = MockClassifier()
    grid_search = DaskGridSearchCV(clf, {'foo_param': [1, 2, 3]})
    # make sure it selects the smallest parameter in case of ties
    grid_search.fit(X, y)
    assert grid_search.best_estimator_.foo_param == 2

    assert_array_equal(grid_search.cv_results_["param_foo_param"].data,
                       [1, 2, 3])

    # Smoke test the score etc:
    grid_search.score(X, y)
    grid_search.predict_proba(X)
    grid_search.decision_function(X)
    grid_search.transform(X)

    # Test exception handling on scoring
    grid_search.scoring = 'sklearn'
    with pytest.raises(ValueError):
        grid_search.fit(X, y)