Ejemplo n.º 1
0
def test_pipeline_score_samples_pca_lof():
    X, y = make_classification(
        n_classes=2,
        class_sep=2,
        weights=[0.3, 0.7],
        n_informative=3,
        n_redundant=1,
        flip_y=0,
        n_features=20,
        n_clusters_per_class=1,
        n_samples=500,
        random_state=0,
    )
    # Test that the score_samples method is implemented on a pipeline.
    # Test that the score_samples method on pipeline yields same results as
    # applying transform and score_samples steps separately.
    rus = RandomUnderSampler(random_state=42)
    pca = PCA(svd_solver="full", n_components="mle", whiten=True)
    lof = LocalOutlierFactor(novelty=True)
    pipe = Pipeline([("rus", rus), ("pca", pca), ("lof", lof)])
    pipe.fit(X, y)
    # Check the shapes
    assert pipe.score_samples(X).shape == (X.shape[0], )
    # Check the values
    X_res, _ = rus.fit_resample(X, y)
    lof.fit(pca.fit_transform(X_res))
    assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))
Ejemplo n.º 2
0
def test_pipeline_score_samples_pca_lof():
    X, y = load_iris(return_X_y=True)
    sampling_strategy = {0: 50, 1: 30, 2: 20}
    X, y = make_imbalance(X, y, sampling_strategy=sampling_strategy)
    # Test that the score_samples method is implemented on a pipeline.
    # Test that the score_samples method on pipeline yields same results as
    # applying transform and score_samples steps separately.
    rus = RandomUnderSampler()
    pca = PCA(svd_solver='full', n_components='mle', whiten=True)
    lof = LocalOutlierFactor(novelty=True)
    pipe = Pipeline([('rus', rus), ('pca', pca), ('lof', lof)])
    pipe.fit(X, y)
    # Check the shapes
    assert pipe.score_samples(X).shape == (X.shape[0], )
    # Check the values
    lof.fit(pca.fit_transform(X))
    assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))