Beispiel #1
0
def test_X_as_list():
    # Pass X as list in GridSearchCV
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    clf = CheckingClassifier(check_X=lambda x: isinstance(x, list))
    cv = KFold(n_splits=3)
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
    grid_search.fit(X.tolist(), y).score(X, y)
    assert_true(hasattr(grid_search, "results_"))
Beispiel #2
0
def test_gridsearch_nd():
    # Pass X as list in GridSearchCV
    X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
    y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
    check_X = lambda x: x.shape[1:] == (5, 3, 2)
    check_y = lambda x: x.shape[1:] == (7, 11)
    clf = CheckingClassifier(check_X=check_X, check_y=check_y)
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]})
    grid_search.fit(X_4d, y_3d).score(X, y)
    assert_true(hasattr(grid_search, "results_"))
def test_y_as_list():
    # Pass y as list in GridSearchCV
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
    cv = KFold(n=len(X), n_folds=3)
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
    grid_search.fit(X, y.tolist()).score(X, y)
    assert_true(hasattr(grid_search, "grid_scores_"))
Beispiel #4
0
 def test_gridsearch_nd(self):
     # Pass X as list in GridSearchCV
     X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
     y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
     check_X = lambda x: x.shape[1:] == (5, 3, 2)
     check_y = lambda x: x.shape[1:] == (7, 11)
     clf = CheckingClassifier(check_X=check_X, check_y=check_y)
     grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]},
                                  webserver_url=self.live_server_url)
     wait(grid_search.fit(X_4d, y_3d))
     assert_true(hasattr(grid_search, "grid_scores_"))
Beispiel #5
0
def test_cross_val_score():
    clf = MockClassifier()
    for a in range(-10, 10):
        clf.a = a
        # Smoke test
        scores = cval.cross_val_score(clf, X, y)
        assert_array_equal(scores, clf.score(X, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

        scores = cval.cross_val_score(clf, X_sparse, y)
        assert_array_equal(scores, clf.score(X_sparse, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    scores = cval.cross_val_score(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    scores = cval.cross_val_score(clf, X, y.tolist())

    assert_raises(ValueError,
                  cval.cross_val_score,
                  clf,
                  X,
                  y,
                  scoring="sklearn")

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    clf = MockClassifier(allow_nd=True)
    scores = cval.cross_val_score(clf, X_3d, y)

    clf = MockClassifier(allow_nd=False)
    assert_raises(ValueError, cval.cross_val_score, clf, X_3d, y)
Beispiel #6
0
    def test_X_as_list(self):
        # Pass X as list in GridSearchCV
        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        clf = CheckingClassifier(check_X=lambda x: isinstance(x, list))
        cv = KFold(n=len(X), n_folds=3)
        grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]},
                                     cv=cv,
                                     webserver_url=self.live_server_url)
        wait(grid_search.fit(X.tolist(), y))
        assert_true(hasattr(grid_search, "grid_scores_"))
def test_gridsearch_feature_extractor():
    X = data
    y = np.ones((X.shape[0], ))  # dummy labels
    pipe = Pipeline([
        ('FE', FeatureExtractor(sfreq=sfreq, selected_funcs=['higuchi_fd'])),
        ('clf',
         CheckingClassifier(
             check_X=lambda arr: arr.shape[1:] == (X.shape[1], )))
    ])
    params_grid = {'FE__higuchi_fd__kmax': [5, 10]}
    gs = GridSearchCV(estimator=pipe, param_grid=params_grid)
    gs.fit(X, y)
    assert_equal(hasattr(gs, 'cv_results_'), True)
Beispiel #8
0
def test_cross_val_predict_input_types():
    iris = load_iris()
    X, y = iris.data, iris.target
    X_sparse = coo_matrix(X)
    multioutput_y = np.column_stack([y, y[::-1]])

    clf = Ridge(fit_intercept=False, random_state=0)
    # 3 fold cv is used --> atleast 3 samples per class
    # Smoke test
    predictions = cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (150, ))

    # test with multioutput y
    predictions = cross_val_predict(clf, X_sparse, multioutput_y)
    assert_equal(predictions.shape, (150, 2))

    predictions = cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (150, ))

    # test with multioutput y
    predictions = cross_val_predict(clf, X_sparse, multioutput_y)
    assert_array_equal(predictions.shape, (150, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (150, ))
Beispiel #9
0
def test_cross_val_score_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_score(clf, X_df, y_ser)
Beispiel #10
0
def test_intercept_model():
    data = patsy.demo_data("x1", "x2", "x3", "y")

    def check_X_no_intercept(X):
        return X.shape[1] == 2

    # check wether X contains only the two features, no intercept
    est = PatsyModel(CheckingClassifier(check_X=check_X_no_intercept),
                     "y ~ x1 + x2")
    est.fit(data)
    # predict checks applying to new data
    est.predict(data)

    def check_X_intercept(X):
        shape_correct = X.shape[1] == 3
        first_is_intercept = np.all(X[:, 0] == 1)
        return shape_correct and first_is_intercept

    # check wether X does contain intercept
    est = PatsyModel(CheckingClassifier(check_X=check_X_intercept),
                     "y ~ x1 + x2",
                     add_intercept=True)
    est.fit(data)
    est.predict(data)
Beispiel #11
0
def test_stateful_model():
    data_train = patsy.demo_data("x1", "x2", "y")
    data_train['x1'][:] = 1
    # mean of x1 is 1
    data_test = patsy.demo_data("x1", "x2", "y")
    data_test['x1'][:] = 0

    # center x1
    est = PatsyModel(CheckingClassifier(), "y ~ center(x1) + x2")
    est.fit(data_train)

    def check_centering(X):
        return np.all(X[:, 0] == -1)

    est.estimator_.check_X = check_centering
    # make sure that mean of training, not test data was removed
    est.predict(data_test)
Beispiel #12
0
def test_permutation_test_score_pandas():
    # check permutation_test_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        iris = load_iris()
        X, y = iris.data, iris.target
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        permutation_test_score(clf, X_df, y_ser)
Beispiel #13
0
def test_scope_model():
    data = patsy.demo_data("x1", "x2", "x3", "y")

    def myfunc(x):
        tmp = np.ones_like(x)
        tmp.fill(42)
        return tmp

    def check_X(X):
        return np.all(X[:, 1] == 42)

    # checking classifier raises error if check_X doesn't return true.
    # this checks that myfunc was actually applied
    est = PatsyModel(CheckingClassifier(check_X=check_X),
                     "y ~ x1 + myfunc(x2)")
    est.fit(data)

    # test feature names
    assert_equal(est.feature_names_, ["x1", "myfunc(x2)"])
def check_hyperparameter_searcher_with_fit_params(klass, **klass_kwargs):
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)
    clf = CheckingClassifier(expected_fit_params=['spam', 'eggs'])
    searcher = klass(clf, {'foo_param': [1, 2, 3]}, cv=2, **klass_kwargs)

    # The CheckingClassifier generates an assertion error if
    # a parameter is missing or has length != len(X).
    assert_raise_message(AssertionError,
                         "Expected fit parameter(s) ['eggs'] not seen.",
                         searcher.fit,
                         X,
                         y,
                         spam=np.ones(10))
    assert_raise_message(AssertionError,
                         "Fit parameter spam has length 1; expected 4.",
                         searcher.fit,
                         X,
                         y,
                         spam=np.ones(1),
                         eggs=np.zeros(10))
    searcher.fit(X, y, spam=np.ones(10), eggs=np.zeros(10))
Beispiel #15
0
def test_pandas_input():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((DataFrame, Series))
    except ImportError:
        pass

    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    for InputFeatureType, TargetType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)

        grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]})
        grid_search.fit(X_df, y_ser).score(X_df, y_ser)
        grid_search.predict(X_df)
        assert_true(hasattr(grid_search, "grid_scores_"))