def test_grid_search_sparse(): # Test that grid search works with both dense and sparse matrices X, y = make_classification(n_samples=200, n_features=100, random_state=0) cv = GridSearchCV(LinearSVC(), {'C': [0.1, 1.0]}) cv.fit(X[:180], y[:180]) y_pred = cv.predict(X[180:]) C = cv.best_estimator_.C X = sparse.csr_matrix(X) cv.fit(X[:180], y[:180]) y_pred2 = cv.predict(X[180:]) C2 = cv.best_estimator_.C assert np.mean(y_pred == y_pred2) >= .9 assert C == C2
def test_grid_search_dask_inputs(): # Test that the best estimator contains the right value for foo_param dX = da.from_array(X, chunks=2) dy = da.from_array(y, chunks=2) clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}) # make sure it selects the smallest parameter in case of ties grid_search.fit(dX, dy) assert grid_search.best_estimator_.foo_param == 2 for i, foo_i in enumerate([1, 2, 3]): assert grid_search.grid_scores_[i][0] == {'foo_param': foo_i} y_pred = grid_search.predict(dX) assert isinstance(y_pred, da.Array) tm.assert_array_equal(y_pred, X.sum(axis=1)) y_pred = grid_search.predict(X) assert isinstance(y_pred, np.ndarray) tm.assert_array_equal(y_pred, X.sum(axis=1))