Ejemplo n.º 1
0
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100,
                               n_classes=2,
                               flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = dcv.GridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = dcv.GridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = dcv.GridSearchCV(LinearSVCNoScore(),
                                                  grid,
                                                  scoring='roc_auc').fit(X, y)
    search_auc = dcv.GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # Check warning only occurs in situation where behavior changed:
    # estimator requires score method to compete with scoring parameter
    score_no_scoring = search_no_scoring.score(X, y)
    score_accuracy = search_accuracy.score(X, y)
    score_no_score_auc = search_no_score_method_auc.score(X, y)
    score_auc = search_auc.score(X, y)

    # ensure the test is sane
    assert score_auc < 1.0
    assert score_accuracy < 1.0
    assert score_auc != score_accuracy

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
Ejemplo n.º 2
0
def test_return_train_score_warn():
    # Test that warnings are raised. Will be removed in sklearn 0.21
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)
    grid = {'C': [1, 2]}

    for val in [True, False]:
        est = dcv.GridSearchCV(LinearSVC(random_state=0),
                               grid,
                               return_train_score=val)
        with pytest.warns(None) as warns:
            results = est.fit(X, y).cv_results_
        assert not warns
        assert type(results) is dict

    est = dcv.GridSearchCV(LinearSVC(random_state=0), grid)
    with pytest.warns(None) as warns:
        results = est.fit(X, y).cv_results_
    assert not warns

    train_keys = {
        'split0_train_score', 'split1_train_score', 'split2_train_score',
        'mean_train_score', 'std_train_score'
    }

    for key in results:
        if key in train_keys:
            with pytest.warns(FutureWarning):
                results[key]
        else:
            with pytest.warns(None) as warns:
                results[key]
            assert not warns
Ejemplo n.º 3
0
def test_grid_search_no_score():
    # Test grid-search on classifier that has no score function.
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    clf_no_score = LinearSVCNoScore(random_state=0)

    # XXX: It seems there's some global shared state in LinearSVC - fitting
    # multiple `SVC` instances in parallel using threads sometimes results in
    # wrong results. This only happens with threads, not processes/sync.
    # For now, we'll fit using the sync scheduler.
    grid_search = dcv.GridSearchCV(clf, {'C': Cs},
                                   scoring='accuracy',
                                   scheduler='sync')
    grid_search.fit(X, y)

    grid_search_no_score = dcv.GridSearchCV(clf_no_score, {'C': Cs},
                                            scoring='accuracy',
                                            scheduler='sync')
    # smoketest grid search
    grid_search_no_score.fit(X, y)

    # check that best params are equal
    assert grid_search_no_score.best_params_ == grid_search.best_params_
    # check that we can call score and that it gives the correct result
    assert grid_search.score(X, y) == grid_search_no_score.score(X, y)

    # giving no scoring function raises an error
    grid_search_no_score = dcv.GridSearchCV(clf_no_score, {'C': Cs})
    with pytest.raises(TypeError) as exc:
        grid_search_no_score.fit([[1]])
    assert "no scoring" in str(exc.value)
def test_grid_search_failing_classifier():
    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
    clf = FailingClassifier()

    # refit=False because we want to test the behaviour of the grid search part
    gs = dcv.GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                          refit=False, error_score=0.0)

    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    n_candidates = len(gs.cv_results_['params'])

    # Ensure that grid scores were set to zero as required for those fits
    # that are expected to fail.
    def get_cand_scores(i):
        return np.array(list(gs.cv_results_['split%d_test_score' % s][i]
                             for s in range(gs.n_splits_)))

    assert all((np.all(get_cand_scores(cand_i) == 0.0)
                for cand_i in range(n_candidates)
                if gs.cv_results_['param_parameter'][cand_i] ==
                FailingClassifier.FAILING_PARAMETER))

    gs = dcv.GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                          refit=False, error_score=float('nan'))

    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    n_candidates = len(gs.cv_results_['params'])
    assert all(np.all(np.isnan(get_cand_scores(cand_i)))
               for cand_i in range(n_candidates)
               if gs.cv_results_['param_parameter'][cand_i] ==
               FailingClassifier.FAILING_PARAMETER)
Ejemplo n.º 5
0
def test_grid_search_bad_param_grid():
    param_dict = {"C": 1.0}
    clf = SVC()

    with pytest.raises(ValueError) as exc:
        dcv.GridSearchCV(clf, param_dict)
    assert ("Parameter values for parameter (C) need to be a sequence"
            "(but not a string) or np.ndarray.") in str(exc.value)

    param_dict = {"C": []}
    clf = SVC()

    with pytest.raises(ValueError) as exc:
        dcv.GridSearchCV(clf, param_dict)
    assert ("Parameter values for parameter (C) need to be a non-empty "
            "sequence.") in str(exc.value)

    param_dict = {"C": "1,2,3"}
    clf = SVC()

    with pytest.raises(ValueError) as exc:
        dcv.GridSearchCV(clf, param_dict)
    assert ("Parameter values for parameter (C) need to be a sequence"
            "(but not a string) or np.ndarray.") in str(exc.value)

    param_dict = {"C": np.ones(6).reshape(3, 2)}
    clf = SVC()
    with pytest.raises(ValueError):
        dcv.GridSearchCV(clf, param_dict)
Ejemplo n.º 6
0
def test_grid_search_sparse_scoring():
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert_array_equal(y_pred, y_pred2)
    assert C == C2

    # Smoke test the score
    # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
    #                            cv.score(X_[:180], y[:180]))

    # test loss where greater is worse
    def f1_loss(y_true_, y_pred_):
        return -f1_score(y_true_, y_pred_)

    F1Loss = make_scorer(f1_loss, greater_is_better=False)
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring=F1Loss)
    cv.fit(X_[:180], y_[:180])
    y_pred3 = cv.predict(X_[180:])
    C3 = cv.best_estimator_.C

    assert C == C3
    assert_array_equal(y_pred, y_pred3)
Ejemplo n.º 7
0
def test_grid_search_cv_results():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    n_grid_points = 6
    params = [
        dict(kernel=[
            'rbf',
        ], C=[1, 10], gamma=[0.1, 1]),
        dict(kernel=[
            'poly',
        ], degree=[1, 2])
    ]
    grid_search = dcv.GridSearchCV(SVC(),
                                   cv=n_splits,
                                   iid=False,
                                   param_grid=params)
    grid_search.fit(X, y)
    grid_search_iid = dcv.GridSearchCV(SVC(),
                                       cv=n_splits,
                                       iid=True,
                                       param_grid=params)
    grid_search_iid.fit(X, y)

    param_keys = ('param_C', 'param_degree', 'param_gamma', 'param_kernel')
    score_keys = ('mean_test_score', 'mean_train_score', 'rank_test_score',
                  'split0_test_score', 'split1_test_score',
                  'split2_test_score', 'split0_train_score',
                  'split1_train_score', 'split2_train_score', 'std_test_score',
                  'std_train_score', 'mean_fit_time', 'std_fit_time',
                  'mean_score_time', 'std_score_time')
    n_candidates = n_grid_points

    for search, iid in zip((grid_search, grid_search_iid), (False, True)):
        assert iid == search.iid
        cv_results = search.cv_results_
        # Check if score and timing are reasonable
        assert all(cv_results['rank_test_score'] >= 1)
        assert all(
            all(cv_results[k] >= 0) for k in score_keys
            if k != 'rank_test_score')
        assert all(
            all(cv_results[k] <= 1) for k in score_keys
            if 'time' not in k and k != 'rank_test_score')
        # Check cv_results structure
        check_cv_results_array_types(cv_results, param_keys, score_keys)
        check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
        # Check masking
        cv_results = grid_search.cv_results_
        n_candidates = len(grid_search.cv_results_['params'])
        assert all((
            cv_results['param_C'].mask[i] and cv_results['param_gamma'].mask[i]
            and not cv_results['param_degree'].mask[i])
                   for i in range(n_candidates)
                   if cv_results['param_kernel'][i] == 'linear')
        assert all((not cv_results['param_C'].mask[i]
                    and not cv_results['param_gamma'].mask[i]
                    and cv_results['param_degree'].mask[i])
                   for i in range(n_candidates)
                   if cv_results['param_kernel'][i] == 'rbf')
Ejemplo n.º 8
0
def test_grid_search_groups():
    # Check if ValueError (when groups is None) propagates to dcv.GridSearchCV
    # And also check if groups is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    group_cvs = [
        LeaveOneGroupOut(),
        LeavePGroupsOut(2),
        GroupKFold(),
        GroupShuffleSplit()
    ]
    for cv in group_cvs:
        gs = dcv.GridSearchCV(clf, grid, cv=cv)

        with pytest.raises(ValueError) as exc:
            assert gs.fit(X, y)
        assert "The groups parameter should not be None" in str(exc.value)

        gs.fit(X, y, groups=groups)

    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_group_cvs:
        gs = dcv.GridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y)
Ejemplo n.º 9
0
def test_classes__property():
    # Test that classes_ property matches best_estimator_.classes_
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)
    Cs = [.1, 1, 10]

    grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs})
    grid_search.fit(X, y)
    assert_array_equal(grid_search.best_estimator_.classes_,
                       grid_search.classes_)

    # Test that regressors do not have a classes_ attribute
    grid_search = dcv.GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]})
    grid_search.fit(X, y)
    assert not hasattr(grid_search, 'classes_')

    # Test that the grid searcher has no classes_ attribute before it's fit
    grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs})
    assert not hasattr(grid_search, 'classes_')

    # Test that the grid searcher has no classes_ attribute without a refit
    grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs},
                                   refit=False)
    grid_search.fit(X, y)
    assert not hasattr(grid_search, 'classes_')
Ejemplo n.º 10
0
def test_pipeline_raises():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([('step1', MockClassifier()), ('step2', MockClassifier())])

    grid = {'step3__parameter': [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(ValueError):
        gs.fit(X, y)

    grid = {'steps': [[('one', MockClassifier()), ('two', MockClassifier())]]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(NotImplementedError):
        gs.fit(X, y)
def test_unsupervised_grid_search():
    # test grid-search with unsupervised estimator
    X, y = make_blobs(random_state=0)
    km = KMeans(random_state=0)
    grid_search = dcv.GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]),
                                   scoring='adjusted_rand_score')
    grid_search.fit(X, y)
    # ARI can find the right number :)
    assert grid_search.best_params_["n_clusters"] == 3

    # Now without a score, and without y
    grid_search = dcv.GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]))
    grid_search.fit(X)
    assert grid_search.best_params_["n_clusters"] == 4
Ejemplo n.º 12
0
def test_bad_error_score():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    gs = dcv.GridSearchCV(MockClassifier(), {'foo_param': [0, 1, 2]},
                          error_score='badparam')

    with pytest.raises(ValueError):
        gs.fit(X, y)
Ejemplo n.º 13
0
def test_feature_union_raises():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    union = FeatureUnion([('tr0', MockClassifier()),
                          ('tr1', MockClassifier())])
    pipe = Pipeline([('union', union), ('est', MockClassifier())])

    grid = {'union__tr2__parameter': [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(ValueError):
        gs.fit(X, y)

    grid = {'union__transformer_list': [[('one', MockClassifier())]]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)
    with pytest.raises(NotImplementedError):
        gs.fit(X, y)
Ejemplo n.º 14
0
def test_grid_search_dask_inputs():
    # Numpy versions
    np_X, np_y = make_classification(n_samples=15, n_classes=2, random_state=0)
    np_groups = np.random.RandomState(0).randint(0, 3, 15)
    # Dask array versions
    da_X = da.from_array(np_X, chunks=5)
    da_y = da.from_array(np_y, chunks=5)
    da_groups = da.from_array(np_groups, chunks=5)
    # Delayed versions
    del_X = delayed(np_X)
    del_y = delayed(np_y)
    del_groups = delayed(np_groups)

    cv = GroupKFold()
    clf = SVC(random_state=0)
    grid = {'C': [1]}

    sol = SVC(C=1, random_state=0).fit(np_X, np_y).support_vectors_

    for X, y, groups in product([np_X, da_X, del_X], [np_y, da_y, del_y],
                                [np_groups, da_groups, del_groups]):
        gs = dcv.GridSearchCV(clf, grid, cv=cv)

        with pytest.raises(ValueError) as exc:
            gs.fit(X, y)
        assert "The groups parameter should not be None" in str(exc.value)

        gs.fit(X, y, groups=groups)
        np.testing.assert_allclose(sol, gs.best_estimator_.support_vectors_)
Ejemplo n.º 15
0
def test_multiple_metrics():
    scoring = {'AUC': 'roc_auc', 'Accuracy': make_scorer(accuracy_score)}

    # Setting refit='AUC', refits an estimator on the whole dataset with the
    # parameter setting that has the best cross-validated AUC score.
    # That estimator is made available at ``gs.best_estimator_`` along with
    # parameters like ``gs.best_score_``, ``gs.best_parameters_`` and
    # ``gs.best_index_``
    gs = dcv.GridSearchCV(DecisionTreeClassifier(random_state=42),
                          param_grid={'min_samples_split': range(2, 403, 10)},
                          scoring=scoring,
                          cv=5,
                          refit='AUC')
    gs.fit(da_X, da_y)
    # some basic checks
    assert set(gs.scorer_) == {'AUC', 'Accuracy'}
    cv_results = gs.cv_results_.keys()
    assert 'split0_test_AUC' in cv_results
    assert 'split0_train_AUC' in cv_results

    assert 'split0_test_Accuracy' in cv_results
    assert 'split0_test_Accuracy' in cv_results

    assert 'mean_train_AUC' in cv_results
    assert 'mean_train_Accuracy' in cv_results

    assert 'std_train_AUC' in cv_results
    assert 'std_train_Accuracy' in cv_results
Ejemplo n.º 16
0
def test_predict_proba_disabled():
    # Test predict_proba when disabled on estimator.
    X = np.arange(20).reshape(5, -1)
    y = [0, 0, 1, 1, 1]
    clf = SVC(probability=False)
    gs = dcv.GridSearchCV(clf, {}, cv=2).fit(X, y)
    assert not hasattr(gs, "predict_proba")
Ejemplo n.º 17
0
def test_search_cv_results_rank_tie_breaking():
    X, y = make_blobs(n_samples=50, random_state=42)

    # The two C values are close enough to give similar models
    # which would result in a tie of their mean cv-scores
    param_grid = {'C': [1, 1.001, 0.001]}

    grid_search = dcv.GridSearchCV(SVC(), param_grid=param_grid)
    random_search = dcv.RandomizedSearchCV(SVC(),
                                           n_iter=3,
                                           param_distributions=param_grid)

    for search in (grid_search, random_search):
        search.fit(X, y)
        cv_results = search.cv_results_
        # Check tie breaking strategy -
        # Check that there is a tie in the mean scores between
        # candidates 1 and 2 alone
        assert_almost_equal(cv_results['mean_test_score'][0],
                            cv_results['mean_test_score'][1])
        assert_almost_equal(cv_results['mean_train_score'][0],
                            cv_results['mean_train_score'][1])
        try:
            assert_almost_equal(cv_results['mean_test_score'][1],
                                cv_results['mean_test_score'][2])
        except AssertionError:
            pass
        try:
            assert_almost_equal(cv_results['mean_train_score'][1],
                                cv_results['mean_train_score'][2])
        except AssertionError:
            pass
        # 'min' rank should be assigned to the tied candidates
        assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3])
Ejemplo n.º 18
0
def test_grid_search_precomputed_kernel():
    # Test that grid search works when the input features are given in the
    # form of a precomputed kernel matrix
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    # compute the training kernel matrix corresponding to the linear kernel
    K_train = np.dot(X_[:180], X_[:180].T)
    y_train = y_[:180]

    clf = SVC(kernel='precomputed')
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(K_train, y_train)

    assert cv.best_score_ >= 0

    # compute the test kernel matrix
    K_test = np.dot(X_[180:], X_[:180].T)
    y_test = y_[180:]

    y_pred = cv.predict(K_test)

    assert np.mean(y_pred == y_test) >= 0

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    with pytest.raises(ValueError):
        cv.fit(K_train.tolist(), y_train)
Ejemplo n.º 19
0
def test_scheduler_param_distributed(loop):
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop, set_as_default=False) as client:
            gs = dcv.GridSearchCV(MockClassifier(), {'foo_param': [0, 1, 2]},
                                  cv=3,
                                  scheduler=client)
            gs.fit(X, y)
def test_search_train_scores_set_to_false():
    X = np.arange(6).reshape(6, -1)
    y = [0, 0, 0, 1, 1, 1]
    clf = LinearSVC(random_state=0)

    gs = dcv.GridSearchCV(clf, param_grid={'C': [0.1, 0.2]},
                          return_train_score=False)
    gs.fit(X, y)
Ejemplo n.º 21
0
def test_grid_search_error():
    # Test that grid search will capture errors on data with different length
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    with pytest.raises(ValueError):
        cv.fit(X_[:180], y_)
Ejemplo n.º 22
0
def test_gridsearch_nd():
    # Pass X as list in dcv.GridSearchCV
    X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
    y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
    clf = CheckingClassifier(check_X=lambda x: x.shape[1:] == (5, 3, 2),
                             check_y=lambda x: x.shape[1:] == (7, 11))
    grid_search = dcv.GridSearchCV(clf, {'foo_param': [1, 2, 3]})
    grid_search.fit(X_4d, y_3d).score(X, y)
    assert hasattr(grid_search, "cv_results_")
Ejemplo n.º 23
0
def test_grid_search_precomputed_kernel_error_nonsquare():
    # Test that grid search returns an error with a non-square precomputed
    # training kernel matrix
    K_train = np.zeros((10, 20))
    y_train = np.ones((10, ))
    clf = SVC(kernel='precomputed')
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    with pytest.raises(ValueError):
        cv.fit(K_train, y_train)
Ejemplo n.º 24
0
def test_grid_search_allows_nans():
    # Test dcv.GridSearchCV with Imputer
    X = np.arange(20, dtype=np.float64).reshape(5, -1)
    X[2, :] = np.nan
    y = [0, 0, 1, 1, 1]
    p = Pipeline([
        ('imputer', Imputer(strategy='mean', missing_values='NaN')),
        ('classifier', MockClassifier()),
    ])
    dcv.GridSearchCV(p, {'classifier__foo_param': [1, 2, 3]}, cv=2).fit(X, y)
Ejemplo n.º 25
0
def test_search_cv_results_none_param():
    X, y = [[1], [2], [3], [4], [5]], [0, 0, 0, 0, 1]
    estimators = (DecisionTreeRegressor(), DecisionTreeClassifier())
    est_parameters = {"random_state": [0, None]}
    cv = KFold(random_state=0)

    for est in estimators:
        grid_search = dcv.GridSearchCV(est, est_parameters, cv=cv).fit(X, y)
        assert_array_equal(grid_search.cv_results_['param_random_state'],
                           [0, None])
Ejemplo n.º 26
0
def test_y_as_list():
    # Pass y as list in dcv.GridSearchCV
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
    cv = KFold(n_splits=3)
    grid_search = dcv.GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
    grid_search.fit(X, y.tolist()).score(X, y)
    assert hasattr(grid_search, "cv_results_")
def test_refit():
    # Regression test for bug in refitting
    # Simulates re-fitting a broken estimator; this used to break with
    # sparse SVMs.
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    clf = dcv.GridSearchCV(BrokenClassifier(), [{'parameter': [0, 1]}],
                           scoring="precision", refit=True)
    clf.fit(X, y)
Ejemplo n.º 28
0
def test_grid_search_sparse():
    # Test that grid search works with both dense and sparse matrices
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180].tocoo(), y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert np.mean(y_pred == y_pred2) >= .9
    assert C == C2
def test_visualize():
    pytest.importorskip('graphviz')

    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = SVC(random_state=0)
    grid = {'C': [.1, .5, .9]}
    gs = dcv.GridSearchCV(clf, grid).fit(X, y)

    assert hasattr(gs, 'dask_graph_')

    with tmpdir() as d:
        gs.visualize(filename=os.path.join(d, 'mydask'))
        assert os.path.exists(os.path.join(d, 'mydask.png'))

    # Doesn't work if not fitted
    gs = dcv.GridSearchCV(clf, grid)
    with pytest.raises(NotFittedError):
        gs.visualize()
Ejemplo n.º 30
0
def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    # Non-regression test: grid_scores_ wouldn't be set by dcv.GridSearchCV.
    clf = MockClassifier()
    grid_search = dcv.GridSearchCV(clf, {'foo_param': [1]})
    grid_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")

    random_search = dcv.RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
    random_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")