def test_fit(): with dask.config.set(scheduler="single-threaded"): sgd = SGDClassifier(max_iter=5, tol=1e-3) sgd = fit(sgd, X, Y, classes=np.array([-1, 0, 1])) sol = sgd.predict(z) result = predict(sgd, Z) assert result.chunks == ((2, 2), ) assert result.compute().tolist() == sol.tolist()
def test_dataframes(): df = pd.DataFrame({"x": range(10), "y": [0, 1] * 5}) ddf = dd.from_pandas(df, npartitions=2) with dask.config.set(scheduler="single-threaded"): sgd = SGDClassifier(max_iter=5, tol=1e-3) sgd = fit(sgd, ddf[["x"]], ddf.y, classes=[0, 1]) sol = sgd.predict(df[["x"]]) result = predict(sgd, ddf[["x"]]) da.utils.assert_eq(sol, result)