Esempio n. 1
0
def test_transform_meta_override():
    X = pd.DataFrame({"cat_s": ["a", "b", "c", "d"]})
    dd_X = dd.from_pandas(X, npartitions=2)

    base = OneHotEncoder(sparse=False)
    base.fit(pd.DataFrame(X))

    # Failure when not proving transform_meta
    # because of value dependent model
    wrap = ParallelPostFit(base)
    with pytest.raises(ValueError):
        wrap.transform(dd_X)

    wrap = ParallelPostFit(base,
                           transform_meta=np.array([[0, 0, 0, 0]],
                                                   dtype=np.float64))
    result = wrap.transform(dd_X)
    expected = base.transform(X)
    assert_eq_ar(result, expected)
Esempio n. 2
0
def test_transform(kind):
    X, y = make_classification(chunks=100)

    if kind == "numpy":
        X, y = dask.compute(X, y)
    elif kind == "dask.dataframe":
        X = dd.from_dask_array(X)
        y = dd.from_dask_array(y)

    base = PCA(random_state=0)
    wrap = ParallelPostFit(PCA(random_state=0))

    base.fit(*dask.compute(X, y))
    wrap.fit(*dask.compute(X, y))

    assert_estimator_equal(wrap.estimator, base)

    result = base.transform(*dask.compute(X))
    expected = wrap.transform(X)
    assert_eq_ar(result, expected)