コード例 #1
0
def test_approx_cross_validation_OutSamplerTransformer_regressor(multi_output):

    np.random.seed(123)
    X = np.random.randn(100, 10)
    if multi_output:
        y = np.random.randn(100, 2)
    else:
        y = np.random.randn(100)

    model = OutSamplerTransformer(RandomForestRegressor(n_estimators=10,
                                                        random_state=123),
                                  cv=10)

    cv_res, yhat = model.approx_cross_validation(X,
                                                 y,
                                                 cv=10,
                                                 method="transform",
                                                 no_scoring=True)

    assert cv_res is None
    assert yhat.ndim == 2
    if multi_output:
        assert yhat.shape == y.shape
    else:
        assert yhat.shape == (y.shape[0], 1)

    with pytest.raises(NotFittedError):
        model.transform(X)

    cv = KFold(n_splits=10, shuffle=True, random_state=123)

    model = OutSamplerTransformer(DummyRegressor(), cv=cv)
    yhat1 = model.fit_transform(X, y)

    cv_res, yhat2 = model.approx_cross_validation(X,
                                                  y,
                                                  cv=cv,
                                                  method="transform",
                                                  no_scoring=True,
                                                  return_predict=True)
    # Approx cross val and fit transform should return the same thing here
    assert np.abs((yhat1 - yhat2).flatten()).max() <= 10**(-5)

    if multi_output:
        yhat3 = np.zeros(y.shape)
    else:
        yhat3 = np.zeros((y.shape[0], 1))

    for train, test in cv.split(X, y):
        model = DummyRegressor()
        model.fit(X[train, :], y[train])

        if multi_output:
            yhat3[test, :] = model.predict(X[test, :])
        else:
            yhat3[test, 0] = model.predict(X[test, :])

    assert np.abs((yhat1 - yhat3).flatten()).max() <= 10**(-5)
    assert np.abs((yhat1 - yhat2).flatten()).max() <= 10**(-5)
コード例 #2
0
def test_OutSampleTransformer_classifier_unbalanced():
    np.random.seed(123)
    X = np.random.randn(100, 2)
    y = np.array(["AA"] * 33 + ["BB"] * 33 + ["CC"] * 33 + ["DD"])

    model = OutSamplerTransformer(
        RandomForestClassifier(n_estimators=10, random_state=123))

    p3 = model.fit_transform(X, y)

    assert (p3.max(axis=1) > 0).all()
コード例 #3
0
def test_approx_cross_validation_OutSamplerTransformer_classifier():

    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = 1 * (np.random.randn(100) > 0)

    model = OutSamplerTransformer(RandomForestClassifier(random_state=123),
                                  cv=10)

    cv_res, yhat = model.approx_cross_validation(X,
                                                 y,
                                                 cv=10,
                                                 method="transform",
                                                 no_scoring=True)

    assert cv_res is None
    assert yhat.ndim == 2
    assert yhat.shape == (y.shape[0], 1)

    with pytest.raises(NotFittedError):
        model.transform(X)

    with pytest.raises(NotFittedError):
        model.model.predict(X)

    cv = KFold(n_splits=10, shuffle=True, random_state=123)
    model = OutSamplerTransformer(LogisticRegression(C=1, random_state=123),
                                  cv=cv)
    yhat1 = model.fit_transform(X, y)

    model = OutSamplerTransformer(LogisticRegression(C=1, random_state=123),
                                  cv=cv)
    cv_res, yhat2 = model.approx_cross_validation(X,
                                                  y,
                                                  cv=cv,
                                                  method="transform",
                                                  no_scoring=True,
                                                  return_predict=True)

    # Approx cross val and fit transform should return the same thing here
    assert np.abs((yhat1 - yhat2).flatten()).max() <= 10**(-5)

    yhat3 = np.zeros((y.shape[0], 1))

    for train, test in cv.split(X, y):
        model = LogisticRegression()
        model.fit(X[train, :], y[train])

        yhat3[test, 0] = model.predict_proba(X[test, :])[:, 1]

    assert np.abs((yhat1 - yhat3).flatten()).max() <= 10**(-5)
    assert np.abs((yhat1 - yhat2).flatten()).max() <= 10**(-5)
コード例 #4
0
def test_OutSamplerTransformer_regressor_fit_transform():

    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = np.random.randn(100)

    cv = KFold(n_splits=10, shuffle=True, random_state=123)

    model = OutSamplerTransformer(DummyRegressor(), cv=cv)
    model.fit(X, y)
    y1 = model.transform(X)

    model = OutSamplerTransformer(DummyRegressor(), cv=cv)
    y2 = model.fit_transform(X, y)

    assert np.abs(y1 -
                  y2).flatten().max() >= 0.01  # vector should be different
コード例 #5
0
def test_OutSamplerTransformer_classifier_fit_transform():

    X = np.random.randn(100, 10)
    y = 1 * (np.random.randn(100) > 0)

    cv = KFold(n_splits=10, shuffle=True, random_state=123)

    model = OutSamplerTransformer(LogisticRegression(C=1, random_state=123),
                                  cv=cv)
    model.fit(X, y)
    y1 = model.transform(X)

    model = OutSamplerTransformer(LogisticRegression(C=1, random_state=123),
                                  cv=cv)
    y2 = model.fit_transform(X, y)

    assert np.abs(y1 -
                  y2).flatten().max() >= 0.01  # vector should be different
コード例 #6
0
def test_OutSamplerTransformer_classifier_fit_transform(multi_output):

    X = np.random.randn(100, 10)
    if multi_output:
        y = 1 * (np.random.randn(100, 2) > 0)
    else:
        y = 1 * (np.random.randn(100) > 0)

    cv = KFold(n_splits=10, shuffle=True, random_state=123)

    model = OutSamplerTransformer(RandomForestClassifier(n_estimators=10,
                                                         random_state=123),
                                  cv=cv)

    model.fit(X, y)
    y1 = model.transform(X)

    model = OutSamplerTransformer(RandomForestClassifier(n_estimators=10,
                                                         random_state=123),
                                  cv=cv)
    y2 = model.fit_transform(X, y)

    assert np.abs(y1 -
                  y2).flatten().max() >= 0.01  # vector should be different