Ejemplo n.º 1
0
def test_pipeline_new_with_params():
    p = Pipeline([
        steps.SelectCanvas('band_1'),
        steps.Flatten(),
        ('pca', steps.Transform(IncrementalPCA(n_components=3))),
        ('kmeans', KMeans(n_clusters=4))
    ])
    p.fit(random_elm_store())
    p.predict(random_elm_store())
    assert p.steps[-1][-1].cluster_centers_.shape[0] == 4
    p2 = p.new_with_params(kmeans__n_clusters=7, pca__n_components=2)
    with pytest.raises(NotFittedError):
        p2.predict(random_elm_store())
    p2.fit(random_elm_store())
    assert p2.steps[-1][-1].cluster_centers_.shape[0] == 7
Ejemplo n.º 2
0
def test_poly():
    s = flat_poly_var_kmeans
    p = Pipeline(s[:1])
    flat, y, sample_weight = p.fit_transform(**data_source)
    assert hasattr(flat, 'flat')
    p = Pipeline(s[:2])
    more_cols, _, _ = p.fit_transform(**data_source)
    assert more_cols.flat.shape[1] > flat.flat.shape[1]
    p = Pipeline(s[:3])
    feat_sel = p.fit_transform(**data_source)
    assert isinstance(feat_sel, tuple)
    p = Pipeline(s)  # thru KMeans
    # fit should always return a Pipeline instance (self after fitting)
    fitted = p.fit(**data_source)
    assert isinstance(fitted, Pipeline)
    assert isinstance(fitted.steps[-1][-1], KMeans)
    assert fitted._estimator.cluster_centers_.shape[0] == fitted.get_params(
    )['kmeans__n_clusters']
    # predict should return KMeans's predict output
    pred = p.predict(**data_source)
    # fit_transform here should return the transform of the KMeans,
    # the distances in each dimension to the cluster centers.
    out = p.fit_transform(**data_source)
    assert isinstance(out, tuple) and len(out) == 3
    X, _, _ = out
    assert X.shape[0] == pred.size