예제 #1
0
def test_multiclass():
    X, y = sklearn.datasets.make_classification(n_classes=3, n_informative=4)
    X = da.from_array(X, chunks=50)
    y = da.from_array(y, chunks=50)

    if SK_GE_020:
        kwargs = {"multi_class": "auto"}
    else:
        kwargs = {}
    clf = ParallelPostFit(
        LogisticRegression(random_state=0, n_jobs=1, solver="lbfgs", **kwargs)
    )

    clf.fit(X, y)
    result = clf.predict(X)
    expected = clf.estimator.predict(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)

    result = clf.predict_proba(X)
    expected = clf.estimator.predict_proba(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)
def test_predict(kind):
    X, y = make_classification(chunks=100)

    if kind == "numpy":
        X, y = dask.compute(X, y)
    elif kind == "dask.dataframe":
        X = dd.from_dask_array(X)
        y = dd.from_dask_array(y)

    base = LogisticRegression(random_state=0, n_jobs=1, solver="lbfgs")
    wrap = ParallelPostFit(
        LogisticRegression(random_state=0, n_jobs=1, solver="lbfgs"))

    base.fit(X, y)
    wrap.fit(X, y)

    assert_estimator_equal(wrap.estimator, base)

    result = wrap.predict(X)
    expected = base.predict(X)
    assert_eq_ar(result, expected)

    result = wrap.predict_proba(X)
    expected = base.predict_proba(X)
    assert_eq_ar(result, expected)
예제 #3
0
def test_sparse_inputs():
    X = csr_matrix((3, 4))
    y = np.asarray([0, 0, 1], dtype=np.int32)

    base = SGDClassifier(tol=1e-3)
    base = base.fit(X, y)

    wrap = ParallelPostFit(base)
    X_da = da.from_array(X, chunks=(1, 4))

    result = wrap.predict(X_da).compute()
    expected = base.predict(X)

    assert_eq_ar(result, expected)
예제 #4
0
def test_multiclass():
    X, y = make_classification(chunks=50, n_classes=3, n_informative=4)
    clf = ParallelPostFit(LogisticRegression(random_state=0))

    clf.fit(X, y)
    result = clf.predict(X)
    expected = clf.estimator.predict(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)

    result = clf.predict_proba(X)
    expected = clf.estimator.predict_proba(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)
예제 #5
0
def test_transform_meta_override():
    X = pd.DataFrame({"cat_s": ["a", "b", "c", "d"]})
    dd_X = dd.from_pandas(X, npartitions=2)

    base = OneHotEncoder(sparse=False)
    base.fit(pd.DataFrame(X))

    # Failure when not proving transform_meta
    # because of value dependent model
    wrap = ParallelPostFit(base)
    with pytest.raises(ValueError):
        wrap.transform(dd_X)

    wrap = ParallelPostFit(base,
                           transform_meta=np.array([[0, 0, 0, 0]],
                                                   dtype=np.float64))
    result = wrap.transform(dd_X)
    expected = base.transform(X)
    assert_eq_ar(result, expected)
예제 #6
0
def test_transform(kind):
    X, y = make_classification(chunks=100)

    if kind == "numpy":
        X, y = dask.compute(X, y)
    elif kind == "dask.dataframe":
        X = dd.from_dask_array(X)
        y = dd.from_dask_array(y)

    base = PCA(random_state=0)
    wrap = ParallelPostFit(PCA(random_state=0))

    base.fit(*dask.compute(X, y))
    wrap.fit(*dask.compute(X, y))

    assert_estimator_equal(wrap.estimator, base)

    result = base.transform(*dask.compute(X))
    expected = wrap.transform(X)
    assert_eq_ar(result, expected)
예제 #7
0
def test_predict_meta_override():
    X = pd.DataFrame({"c_0": [1, 2, 3, 4]})
    y = np.array([1, 2, 3, 4])

    base = CategoricalNB()
    base.fit(pd.DataFrame(X), y)

    dd_X = dd.from_pandas(X, npartitions=2)
    dd_X._meta = pd.DataFrame({"c_0": [5]})

    # Failure when not proving predict_meta
    # because of value dependent model
    wrap = ParallelPostFit(base)
    with pytest.raises(ValueError):
        wrap.predict(dd_X)

    # Success when providing meta over-ride
    wrap = ParallelPostFit(base, predict_meta=np.array([1]))
    result = wrap.predict(dd_X)
    expected = base.predict(X)
    assert_eq_ar(result, expected)
예제 #8
0
def test_multiclass():
    X, y = sklearn.datasets.make_classification(n_classes=3, n_informative=4)
    X = da.from_array(X, chunks=50)
    y = da.from_array(y, chunks=50)

    clf = ParallelPostFit(
        LogisticRegression(random_state=0,
                           n_jobs=1,
                           solver="lbfgs",
                           multi_class="auto"))

    clf.fit(*dask.compute(X, y))
    result = clf.predict(X)
    expected = clf.estimator.predict(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)

    result = clf.predict_proba(X)
    expected = clf.estimator.predict_proba(X)

    assert isinstance(result, da.Array)
    assert_eq_ar(result, expected)

    result = clf.predict_log_proba(X)
    expected = clf.estimator.predict_log_proba(X)
    assert_eq_ar(result, expected)