Esempio n. 1
0
def test_fit():
    with dask.config.set(scheduler="single-threaded"):
        sgd = SGDClassifier(max_iter=5, tol=1e-3)

        sgd = fit(sgd, X, Y, classes=np.array([-1, 0, 1]))

        sol = sgd.predict(z)
        result = predict(sgd, Z)
        assert result.chunks == ((2, 2), )
        assert result.compute().tolist() == sol.tolist()
Esempio n. 2
0
def test_dataframes():
    df = pd.DataFrame({"x": range(10), "y": [0, 1] * 5})
    ddf = dd.from_pandas(df, npartitions=2)

    with dask.config.set(scheduler="single-threaded"):
        sgd = SGDClassifier(max_iter=5, tol=1e-3)

        sgd = fit(sgd, ddf[["x"]], ddf.y, classes=[0, 1])

        sol = sgd.predict(df[["x"]])
        result = predict(sgd, ddf[["x"]])

        da.utils.assert_eq(sol, result)
Esempio n. 3
0
def test_fit_shuffle_blocks():
    N = 10
    X = da.from_array(1 + np.arange(N).reshape(-1, 1), chunks=1)
    y = da.from_array(np.ones(N), chunks=1)
    classes = [0, 1]

    sgd = SGDClassifier(max_iter=5,
                        random_state=0,
                        fit_intercept=False,
                        shuffle=False,
                        tol=1e-3)

    sgd1 = fit(clone(sgd), X, y, random_state=0, classes=classes)
    sgd2 = fit(clone(sgd), X, y, random_state=42, classes=classes)
    assert len(sgd1.coef_) == len(sgd2.coef_) == 1
    assert not np.allclose(sgd1.coef_, sgd2.coef_)

    X, y = make_classification(random_state=0, chunks=20)
    sgd_a = fit(clone(sgd),
                X,
                y,
                random_state=0,
                classes=classes,
                shuffle_blocks=False)
    sgd_b = fit(clone(sgd),
                X,
                y,
                random_state=42,
                classes=classes,
                shuffle_blocks=False)
    assert np.allclose(sgd_a.coef_, sgd_b.coef_)

    with pytest.raises(ValueError, match="cannot be used to seed"):
        fit(
            sgd,
            X,
            y,
            classes=np.array([-1, 0, 1]),
            shuffle_blocks=True,
            random_state=da.random.RandomState(42),
        )
Esempio n. 4
0
def test_no_compute():
    sgd = SGDClassifier(max_iter=5, tol=1e-3)

    result = fit(sgd, X, Y, classes=np.array([-1, 0, 1]), compute=False)
    assert isinstance(result, Delayed)
Esempio n. 5
0
def test_no_partial_fit_raises():
    X, y = make_classification(chunks=50)
    with pytest.raises(ValueError, match="RandomForestClassifier"):
        fit(RandomForestClassifier(), X, y)
Esempio n. 6
0
def test_bag():
    x = db.from_sequence(range(10), npartitions=2)
    vect = dask_ml.feature_extraction.text.HashingVectorizer()
    vect = fit(vect, x, None)
    y = vect.transform(x)
    assert y.shape[1] == vect.n_features