コード例 #1
0
def test_feature_union_fit_failure_multiple_metrics():
    scoring = {"score_1": _passthrough_scorer, "score_2": _passthrough_scorer}
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([
        (
            "union",
            FeatureUnion(
                [("good", MockClassifier()), ("bad", FailingClassifier())],
                transformer_weights={"bad": 0.5},
            ),
        ),
        ("clf", MockClassifier()),
    ])

    grid = {"union__bad__parameter": [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False, scoring=scoring)

    # Check that failure raises if error_score is `'raise'`
    with pytest.raises(ValueError):
        gs.fit(X, y)

    # Check that grid scores were set to error_score on failure
    gs.error_score = float("nan")
    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    for key in scoring:
        check_scores_all_nan(gs, "union__bad__parameter", score_key=key)
コード例 #2
0
def test_pipeline_fit_failure():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([
        ("bad", FailingClassifier()),
        ("good1", MockClassifier()),
        ("good2", MockClassifier()),
    ])

    grid = {
        "bad__parameter": [
            0,
            FailingClassifier.FAILING_PARAMETER,
            FailingClassifier.FAILING_PREDICT_PARAMETER,
            FailingClassifier.FAILING_SCORE_PARAMETER,
        ]
    }
    gs = dcv.GridSearchCV(pipe, grid, refit=False)

    # Check that failure raises if error_score is `'raise'`
    with pytest.raises(ValueError):
        gs.fit(X, y)

    # Check that grid scores were set to error_score on failure
    gs.error_score = float("nan")
    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    check_scores_all_nan(gs, "bad__parameter")
コード例 #3
0
def test_pipeline_raises():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([("step1", MockClassifier()), ("step2", MockClassifier())])

    grid = {"step3__parameter": [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(ValueError):
        gs.fit(X, y)

    grid = {"steps": [[("one", MockClassifier()), ("two", MockClassifier())]]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(NotImplementedError):
        gs.fit(X, y)
コード例 #4
0
def test_feature_union_raises():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    union = FeatureUnion([("tr0", MockClassifier()), ("tr1", MockClassifier())])
    pipe = Pipeline([("union", union), ("est", MockClassifier())])

    grid = {"union__tr2__parameter": [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, param_grid=grid, refit=False)
    with pytest.raises(ValueError):
        gs.fit(X, y)

    grid = {"union__transformer_list": [[("one", MockClassifier())]]}
    gs = dcv.GridSearchCV(pipe, param_grid=grid, refit=False)
    with pytest.raises(NotImplementedError):
        gs.fit(X, y)
コード例 #5
0
def test_scheduler_param(scheduler, n_jobs):
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    gs = dcv.GridSearchCV(MockClassifier(), {'foo_param': [0, 1, 2]},
                          cv=3,
                          scheduler=scheduler,
                          n_jobs=n_jobs)
    gs.fit(X, y)
コード例 #6
0
def test_feature_union_raises():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    union = FeatureUnion([('tr0', MockClassifier()),
                          ('tr1', MockClassifier())])
    pipe = Pipeline([('union', union), ('est', MockClassifier())])

    grid = {'union__tr2__parameter': [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(ValueError):
        gs.fit(X, y)

    grid = {'union__transformer_list': [[('one', MockClassifier())]]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(NotImplementedError):
        gs.fit(X, y)
コード例 #7
0
def test_bad_error_score():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    gs = dcv.GridSearchCV(MockClassifier(), {"foo_param": [0, 1, 2]},
                          error_score="badparam")

    with pytest.raises(ValueError):
        gs.fit(X, y)
コード例 #8
0
def test_grid_search_allows_nans():
    # Test dcv.GridSearchCV with Imputer
    X = np.arange(20, dtype=np.float64).reshape(5, -1)
    X[2, :] = np.nan
    y = [0, 0, 1, 1, 1]

    imputer = SimpleImputer(strategy="mean", missing_values=np.nan)
    p = Pipeline([("imputer", imputer), ("classifier", MockClassifier())])
    dcv.GridSearchCV(p, {"classifier__foo_param": [1, 2, 3]}, cv=2).fit(X, y)
コード例 #9
0
def test_pipeline_fit_failure():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([('bad', FailingClassifier()), ('good1', MockClassifier()),
                     ('good2', MockClassifier())])

    grid = {'bad__parameter': [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)

    # Check that failure raises if error_score is `'raise'`
    with pytest.raises(ValueError):
        gs.fit(X, y)

    # Check that grid scores were set to error_score on failure
    gs.error_score = float('nan')
    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    check_scores_all_nan(gs, 'bad__parameter')
コード例 #10
0
def test_grid_search_allows_nans():
    # Test dcv.GridSearchCV with Imputer
    X = np.arange(20, dtype=np.float64).reshape(5, -1)
    X[2, :] = np.nan
    y = [0, 0, 1, 1, 1]
    p = Pipeline([
        ('imputer', Imputer(strategy='mean', missing_values='NaN')),
        ('classifier', MockClassifier()),
    ])
    dcv.GridSearchCV(p, {'classifier__foo_param': [1, 2, 3]}, cv=2).fit(X, y)
コード例 #11
0
def test_scheduler_param_distributed(loop):  # noqa
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop) as client:
            gs = dcv.GridSearchCV(MockClassifier(), {"foo_param": [0, 1, 2]}, cv=3)
            gs.fit(X, y)

            def f(dask_scheduler):
                return len(dask_scheduler.transition_log)

            assert client.run_on_scheduler(f)  # some work happened on cluster
コード例 #12
0
def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    # Non-regression test: grid_scores_ wouldn't be set by dcv.GridSearchCV.
    clf = MockClassifier()
    grid_search = dcv.GridSearchCV(clf, {"foo_param": [1]})
    grid_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")

    random_search = dcv.RandomizedSearchCV(clf, {"foo_param": [0]}, n_iter=1)
    random_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")
コード例 #13
0
def test_cache_cv():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    X2 = X.view(CountTakes)
    gs = dcv.GridSearchCV(MockClassifier(), {'foo_param': [0, 1, 2]},
                          cv=3,
                          cache_cv=False,
                          scheduler='sync')
    gs.fit(X2, y)
    assert X2.count == 2 * 3 * 3  # (1 train + 1 test) * n_params * n_splits

    X2 = X.view(CountTakes)
    assert X2.count == 0
    gs.cache_cv = True
    gs.fit(X2, y)
    assert X2.count == 2 * 3  # (1 test + 1 train) * n_splits
コード例 #14
0
def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = dcv.GridSearchCV(clf, {"foo_param": [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
    assert_array_almost_equal(grid_search.predict(X),
                              grid_search_pickled.predict(X))

    random_search = dcv.RandomizedSearchCV(clf, {"foo_param": [1, 2, 3]},
                                           refit=True,
                                           n_iter=3)
    random_search.fit(X, y)
    random_search_pickled = pickle.loads(pickle.dumps(random_search))
    assert_array_almost_equal(random_search.predict(X),
                              random_search_pickled.predict(X))
コード例 #15
0
def test_grid_search_allows_nans():
    # Test dcv.GridSearchCV with Imputer
    X = np.arange(20, dtype=np.float64).reshape(5, -1)
    X[2, :] = np.nan
    y = [0, 0, 1, 1, 1]

    if SK_VERSION >= packaging.version.parse("0.20.0.dev0"):
        from sklearn.impute import SimpleImputer

        imputer = SimpleImputer(strategy="mean", missing_values=np.nan)
    else:
        from sklearn.preprocessing import Imputer

        imputer = Imputer(strategy="mean", missing_values="NaN")

    p = Pipeline([("imputer", imputer), ("classifier", MockClassifier())])
    dcv.GridSearchCV(p, {"classifier__foo_param": [1, 2, 3]}, cv=2).fit(X, y)
コード例 #16
0
def test_no_refit():
    # Test that GSCV can be used for model selection alone without refitting
    clf = MockClassifier()
    grid_search = dcv.GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=False)
    grid_search.fit(X, y)
    assert not hasattr(grid_search, "best_estimator_")
    assert not hasattr(grid_search, "best_index_")
    assert not hasattr(grid_search, "best_score_")
    assert not hasattr(grid_search, "best_params_")

    # Make sure the predict/transform etc fns raise meaningfull error msg
    for fn_name in ('predict', 'predict_proba', 'predict_log_proba',
                    'transform', 'inverse_transform'):
        with pytest.raises(NotFittedError) as exc:
            getattr(grid_search, fn_name)(X)
        assert (('refit=False. %s is available only after refitting on the '
                 'best parameters' % fn_name) in str(exc.value))
コード例 #17
0
def test_grid_search():
    # Test that the best estimator contains the right value for foo_param
    clf = MockClassifier()
    grid_search = dcv.GridSearchCV(clf, {"foo_param": [1, 2, 3]})
    # make sure it selects the smallest parameter in case of ties
    grid_search.fit(X, y)
    assert grid_search.best_estimator_.foo_param == 2

    assert_array_equal(grid_search.cv_results_["param_foo_param"].data, [1, 2, 3])

    # Smoke test the score etc:
    grid_search.score(X, y)
    grid_search.predict_proba(X)
    grid_search.decision_function(X)
    grid_search.transform(X)

    # Test exception handling on scoring
    grid_search.scoring = "sklearn"
    with pytest.raises(ValueError):
        grid_search.fit(X, y)