Esempio n. 1
0
def test_predict(kind):
    X, y = make_classification(chunks=100)

    if kind == "numpy":
        X, y = dask.compute(X, y)
    elif kind == "dask.dataframe":
        X = dd.from_dask_array(X)
        y = dd.from_dask_array(y)

    base = LogisticRegression(random_state=0, n_jobs=1, solver="lbfgs")
    wrap = ParallelPostFit(
        LogisticRegression(random_state=0, n_jobs=1, solver="lbfgs"))

    base.fit(*dask.compute(X, y))
    wrap.fit(*dask.compute(X, y))

    assert_estimator_equal(wrap.estimator, base)

    result = wrap.predict(X)
    expected = base.predict(X)
    assert_eq_ar(result, expected)

    result = wrap.predict_proba(X)
    expected = base.predict_proba(X)
    assert_eq_ar(result, expected)

    result = wrap.predict_log_proba(X)
    expected = base.predict_log_proba(X)
    assert_eq_ar(result, expected)
Esempio n. 2
0
def test_multiclass():
    X, y = sklearn.datasets.make_classification(n_classes=3, n_informative=4)
    X = da.from_array(X, chunks=50)
    y = da.from_array(y, chunks=50)

    clf = ParallelPostFit(
        LogisticRegression(random_state=0,
                           n_jobs=1,
                           solver="lbfgs",
                           multi_class="auto"))

    clf.fit(*dask.compute(X, y))
    result = clf.predict(X)
    expected = clf.estimator.predict(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)

    result = clf.predict_proba(X)
    expected = clf.estimator.predict_proba(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)

    result = clf.predict_log_proba(X)
    expected = clf.estimator.predict_log_proba(X)
    assert_eq_ar(result, expected)