예제 #1
0
def test_function_sampler_func(X, y):
    def func(X, y):
        return X[:10], y[:10]

    sampler = FunctionSampler(func=func)
    X_res, y_res = sampler.fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X[:10])
    assert_array_equal(y_res, y[:10])
예제 #2
0
def test_function_sampler_func(X, y):

    def func(X, y):
        return X[:10], y[:10]

    sampler = FunctionSampler(func=func)
    X_res, y_res = sampler.fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X[:10])
    assert_array_equal(y_res, y[:10])
예제 #3
0
def test_function_sampler_func_kwargs(X, y):

    def func(X, y, ratio, random_state):
        rus = RandomUnderSampler(ratio=ratio, random_state=random_state)
        return rus.fit_sample(X, y)

    sampler = FunctionSampler(func=func, kw_args={'ratio': 'auto',
                                                  'random_state': 0})
    X_res, y_res = sampler.fit_sample(X, y)
    X_res_2, y_res_2 = RandomUnderSampler(random_state=0).fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X_res_2)
    assert_array_equal(y_res, y_res_2)
예제 #4
0
def test_function_sampler_identity(X, y):
    sampler = FunctionSampler()
    X_res, y_res = sampler.fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X)
    assert_array_equal(y_res, y)
X_test = np.vstack([moons, blobs])
y_test = np.hstack([np.ones(moons.shape[0], dtype=np.int8),
                    np.zeros(blobs.shape[0], dtype=np.int8)])

plot_scatter(X_test, y_test, 'Testing dataset')


def outlier_rejection(X, y):
    model = IsolationForest(max_samples=100,
                            contamination=0.4,
                            random_state=rng)
    model.fit(X)
    y_pred = model.predict(X)
    return X[y_pred == 1], y[y_pred == 1]


reject_sampler = FunctionSampler(func=outlier_rejection)
X_inliers, y_inliers = reject_sampler.fit_sample(X_train, y_train)
plot_scatter(X_inliers, y_inliers, 'Training data without outliers')

pipe = make_pipeline(FunctionSampler(func=outlier_rejection),
                     LogisticRegression(random_state=rng))
y_pred = pipe.fit(X_train, y_train).predict(X_test)
print(classification_report(y_test, y_pred))

clf = LogisticRegression(random_state=rng)
y_pred = clf.fit(X_train, y_train).predict(X_test)
print(classification_report(y_test, y_pred))

plt.show()
예제 #6
0
def test_function_sampler_identity(X, y):
    sampler = FunctionSampler()
    X_res, y_res = sampler.fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X)
    assert_array_equal(y_res, y)