def test_gridsearch_nd(): # Pass X as list in dcv.GridSearchCV X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2) y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11) clf = CheckingClassifier(check_X=lambda x: x.shape[1:] == (5, 3, 2), check_y=lambda x: x.shape[1:] == (7, 11)) grid_search = dcv.GridSearchCV(clf, {'foo_param': [1, 2, 3]}) grid_search.fit(X_4d, y_3d).score(X, y) assert hasattr(grid_search, "cv_results_")
def test_y_as_list(): # Pass y as list in dcv.GridSearchCV X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(check_y=lambda x: isinstance(x, list)) cv = KFold(n_splits=3) grid_search = dcv.GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv) grid_search.fit(X, y.tolist()).score(X, y) assert hasattr(grid_search, "cv_results_")
def test_hyperparameter_searcher_with_fit_params(cls, kwargs): X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(expected_fit_params=['spam', 'eggs']) pipe = Pipeline([('clf', clf)]) searcher = cls(pipe, {'clf__foo_param': [1, 2, 3]}, cv=2, **kwargs) # The CheckingClassifer generates an assertion error if # a parameter is missing or has length != len(X). with pytest.raises(AssertionError) as exc: searcher.fit(X, y, clf__spam=np.ones(10)) assert "Expected fit parameter(s) ['eggs'] not seen." in str(exc.value) searcher.fit(X, y, clf__spam=np.ones(10), clf__eggs=np.zeros(10)) # Test with dask objects as parameters searcher.fit(X, y, clf__spam=da.ones(10, chunks=2), clf__eggs=dask.delayed(np.zeros(10)))
def test_pandas_input(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((DataFrame, Series)) except ImportError: pass X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) for InputFeatureType, TargetType in types: # X dataframe, y series X_df, y_ser = InputFeatureType(X), TargetType(y) clf = CheckingClassifier(check_X=lambda x: isinstance(x, InputFeatureType), check_y=lambda x: isinstance(x, TargetType)) grid_search = dcv.GridSearchCV(clf, {'foo_param': [1, 2, 3]}) grid_search.fit(X_df, y_ser).score(X_df, y_ser) grid_search.predict(X_df) assert hasattr(grid_search, "cv_results_")