def test_pipeline_score_samples_pca_lof(): X, y = make_classification( n_classes=2, class_sep=2, weights=[0.3, 0.7], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=500, random_state=0, ) # Test that the score_samples method is implemented on a pipeline. # Test that the score_samples method on pipeline yields same results as # applying transform and score_samples steps separately. rus = RandomUnderSampler(random_state=42) pca = PCA(svd_solver="full", n_components="mle", whiten=True) lof = LocalOutlierFactor(novelty=True) pipe = Pipeline([("rus", rus), ("pca", pca), ("lof", lof)]) pipe.fit(X, y) # Check the shapes assert pipe.score_samples(X).shape == (X.shape[0], ) # Check the values X_res, _ = rus.fit_resample(X, y) lof.fit(pca.fit_transform(X_res)) assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))
def test_pipeline_score_samples_pca_lof(): X, y = load_iris(return_X_y=True) sampling_strategy = {0: 50, 1: 30, 2: 20} X, y = make_imbalance(X, y, sampling_strategy=sampling_strategy) # Test that the score_samples method is implemented on a pipeline. # Test that the score_samples method on pipeline yields same results as # applying transform and score_samples steps separately. rus = RandomUnderSampler() pca = PCA(svd_solver='full', n_components='mle', whiten=True) lof = LocalOutlierFactor(novelty=True) pipe = Pipeline([('rus', rus), ('pca', pca), ('lof', lof)]) pipe.fit(X, y) # Check the shapes assert pipe.score_samples(X).shape == (X.shape[0], ) # Check the values lof.fit(pca.fit_transform(X)) assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))