def test_function_sampler_func(X, y): def func(X, y): return X[:10], y[:10] sampler = FunctionSampler(func=func) X_res, y_res = sampler.fit_sample(X, y) assert_allclose_dense_sparse(X_res, X[:10]) assert_array_equal(y_res, y[:10])
def test_function_sampler_func_kwargs(X, y): def func(X, y, ratio, random_state): rus = RandomUnderSampler(ratio=ratio, random_state=random_state) return rus.fit_sample(X, y) sampler = FunctionSampler(func=func, kw_args={'ratio': 'auto', 'random_state': 0}) X_res, y_res = sampler.fit_sample(X, y) X_res_2, y_res_2 = RandomUnderSampler(random_state=0).fit_sample(X, y) assert_allclose_dense_sparse(X_res, X_res_2) assert_array_equal(y_res, y_res_2)
def test_function_sampler_identity(X, y): sampler = FunctionSampler() X_res, y_res = sampler.fit_sample(X, y) assert_allclose_dense_sparse(X_res, X) assert_array_equal(y_res, y)
X_test = np.vstack([moons, blobs]) y_test = np.hstack([np.ones(moons.shape[0], dtype=np.int8), np.zeros(blobs.shape[0], dtype=np.int8)]) plot_scatter(X_test, y_test, 'Testing dataset') def outlier_rejection(X, y): model = IsolationForest(max_samples=100, contamination=0.4, random_state=rng) model.fit(X) y_pred = model.predict(X) return X[y_pred == 1], y[y_pred == 1] reject_sampler = FunctionSampler(func=outlier_rejection) X_inliers, y_inliers = reject_sampler.fit_sample(X_train, y_train) plot_scatter(X_inliers, y_inliers, 'Training data without outliers') pipe = make_pipeline(FunctionSampler(func=outlier_rejection), LogisticRegression(random_state=rng)) y_pred = pipe.fit(X_train, y_train).predict(X_test) print(classification_report(y_test, y_pred)) clf = LogisticRegression(random_state=rng) y_pred = clf.fit(X_train, y_train).predict(X_test) print(classification_report(y_test, y_pred)) plt.show()