Esempio n. 1
0
def test_ols(nrows, ncols, n_parts, fit_intercept, normalize, datatype,
             delayed, client):
    def imp():
        import cuml.comm.serialize  # NOQA

    client.run(imp)

    from cuml.dask.linear_model import LinearRegression as cumlOLS_dask

    n_info = 5
    nrows = int(nrows)
    ncols = int(ncols)
    X, y = make_regression_dataset(datatype, nrows, ncols, n_info)

    X_df, y_df = _prep_training_data(client, X, y, n_parts)

    lr = cumlOLS_dask(fit_intercept=fit_intercept, normalize=normalize)

    lr.fit(X_df, y_df)

    ret = lr.predict(X_df, delayed=delayed)

    error_cuml = mean_squared_error(y, ret.compute().to_pandas().values)

    assert (error_cuml < 1e-6)
Esempio n. 2
0
def test_ols(cluster):

    client = Client(cluster)

    try:

        import dask_cudf

        import cudf
        import numpy as np

        from cuml.dask.linear_model import LinearRegression as cumlOLS_dask

        nrows = 2**8
        ncols = 399

        X, y = load_data(nrows, ncols)

        X_cudf = cudf.DataFrame.from_pandas(X)
        y_cudf = np.array(y.as_matrix())
        y_cudf = y_cudf[:, 0]
        y_cudf = cudf.Series(y_cudf)

        workers = client.has_what().keys()

        X_df = dask_cudf.from_cudf(X_cudf, npartitions=len(workers)).persist()
        y_df = dask_cudf.from_cudf(y_cudf, npartitions=len(workers)).persist()

        lr = cumlOLS_dask()

        lr.fit(X_df, y_df)

        ret = lr.predict(X_df)

        error_cuml = mean_squared_error(y, ret.compute().to_array())

        assert(error_cuml < 1e-6)

    finally:
        client.close()
        cluster.close()
Esempio n. 3
0
def test_ols(nrows,
             ncols,
             n_parts,
             fit_intercept,
             normalize,
             datatype,
             client=None):

    if client is None:
        cluster = LocalCUDACluster()
        client = Client(cluster)

    try:
        from cuml.dask.linear_model import LinearRegression as cumlOLS_dask

        n_info = 5
        nrows = np.int(nrows)
        ncols = np.int(ncols)
        X, y = make_regression_dataset(datatype, nrows, ncols, n_info)

        X_df, y_df = _prep_training_data(client, X, y, n_parts)

        lr = cumlOLS_dask(fit_intercept=fit_intercept, normalize=normalize)

        if n_parts > 2:
            lr.fit(X_df, y_df, force_colocality=True)
        else:
            lr.fit(X_df, y_df)

        ret = lr.predict(X_df)

        error_cuml = mean_squared_error(y, ret.compute().to_pandas().values)

        assert (error_cuml < 1e-6)

    finally:
        client.close()
        cluster.close()