コード例 #1
0
def test_lasso_default(dtype, nrows, column_info, n_parts, cluster):

    client = Client(cluster)
    ncols, n_info = column_info

    try:

        X, y = make_regression(n_samples=nrows,
                               n_features=ncols,
                               n_informative=n_info,
                               client=client,
                               dtype=dtype)

        wait(X)

        lasso = Lasso(client=client)

        lasso.fit(X, y)

        y_hat = lasso.predict(X)

        assert r2_score(y.compute(), y_hat.compute()) >= 0.99

    finally:
        client.close()
コード例 #2
0
def test_elastic_net(dtype, alpha, algorithm, nrows, column_info, n_parts,
                     client, delayed):
    ncols, n_info = column_info

    X, y = make_regression(n_samples=nrows,
                           n_features=ncols,
                           n_informative=n_info,
                           n_parts=n_parts,
                           client=client,
                           dtype=dtype)

    elasticnet = ElasticNet(alpha=np.array([alpha]),
                            fit_intercept=True,
                            normalize=False,
                            max_iter=1000,
                            selection=algorithm,
                            tol=1e-10,
                            client=client)

    elasticnet.fit(X, y)

    y_hat = elasticnet.predict(X, delayed=delayed)

    # based on differences with scikit-learn 0.22
    if alpha == 0.2:
        assert r2_score(y.compute(), y_hat.compute()) >= 0.96

    else:
        assert r2_score(y.compute(), y_hat.compute()) >= 0.80
コード例 #3
0
def test_lasso(dtype, alpha, algorithm,
               nrows, column_info, n_parts, delayed, cluster):
    client = Client(cluster)
    ncols, n_info = column_info

    try:

        X, y = make_regression(n_samples=nrows,
                               n_features=ncols,
                               n_informative=n_info,
                               n_parts=n_parts,
                               client=client,
                               dtype=dtype)

        wait(X)

        lasso = Lasso(alpha=np.array([alpha]), fit_intercept=True,
                      normalize=False, max_iter=1000,
                      selection=algorithm, tol=1e-10,
                      client=client)

        lasso.fit(X, y)

        y_hat = lasso.predict(X, delayed=delayed)

        assert r2_score(y.compute(), y_hat.compute()) >= 0.99

    finally:
        client.close()
コード例 #4
0
def test_elastic_net_default(dtype, nrows, column_info, n_parts, cluster):
    client = Client(cluster)
    ncols, n_info = column_info

    try:

        X, y = make_regression(n_samples=nrows,
                               n_features=ncols,
                               n_informative=n_info,
                               n_parts=n_parts,
                               client=client,
                               dtype=dtype)

        wait(X)

        elasticnet = ElasticNet(client=client)

        elasticnet.fit(X, y)

        y_hat = elasticnet.predict(X)

        assert r2_score(y.compute(), y_hat.compute()) >= 0.96

    finally:
        client.close()
コード例 #5
0
ファイル: test_datasets.py プロジェクト: thomcom/cuml
def test_make_regression(n_samples, n_features, n_informative, n_targets, bias,
                         effective_rank, tail_strength, noise, shuffle, coef,
                         random_state, n_parts, cluster):
    c = Client(cluster)
    try:
        from cuml.dask.datasets import make_regression

        result = make_regression(n_samples=n_samples,
                                 n_features=n_features,
                                 n_informative=n_informative,
                                 n_targets=n_targets,
                                 bias=bias,
                                 effective_rank=effective_rank,
                                 noise=noise,
                                 shuffle=shuffle,
                                 coef=coef,
                                 random_state=random_state,
                                 n_parts=n_parts)

        if coef:
            out, values, coefs = result
        else:
            out, values = result

        assert out.shape == (n_samples, n_features), "out shape mismatch"

        if n_targets > 1:
            assert values.shape == (n_samples, n_targets), \
                   "values shape mismatch"
        else:
            assert values.shape == (n_samples, ), "values shape mismatch"

        assert len(out.chunks[0]) == n_parts
        assert len(out.chunks[1]) == 1

        if coef:
            if n_targets > 1:
                assert coefs.shape == (n_features, n_targets), \
                       "coefs shape mismatch"
                assert len(coefs.chunks[1]) == 1
            else:
                assert coefs.shape == (n_features, ), "coefs shape mismatch"
                assert len(coefs.chunks[0]) == 1

            test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative)

            std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0)

            test1, std_test2 = da.compute(test1, std_test2)

            diff = cp.abs(1.0 - std_test2)
            test2 = cp.all(diff < 1.5 * 10**(-1.))

            assert test1, \
                "Unexpected number of informative features"

            assert test2, "Unexpectedly incongruent outputs"

    finally:
        c.close()
コード例 #6
0
def make_dataset(datatype, nrows, ncols, n_info):
    X, y = make_regression(n_samples=nrows, n_features=ncols,
                           n_informative=n_info, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
    return X_train, y_train, X_test
コード例 #7
0
ファイル: test_datasets.py プロジェクト: rapidsai/cuml
def test_make_regression(n_samples, n_features, n_informative,
                         n_targets, bias, effective_rank,
                         tail_strength, noise, shuffle,
                         coef, n_parts, order,
                         use_full_low_rank, client):

    c = client
    from cuml.dask.datasets import make_regression

    result = make_regression(n_samples=n_samples, n_features=n_features,
                             n_informative=n_informative,
                             n_targets=n_targets, bias=bias,
                             effective_rank=effective_rank, noise=noise,
                             shuffle=shuffle, coef=coef,
                             n_parts=n_parts,
                             use_full_low_rank=use_full_low_rank,
                             order=order)

    if coef:
        out, values, coefs = result
    else:
        out, values = result

    assert out.shape == (n_samples, n_features), "out shape mismatch"

    if n_targets > 1:
        assert values.shape == (n_samples, n_targets), \
               "values shape mismatch"
    else:
        assert values.shape == (n_samples,), "values shape mismatch"

    assert len(out.chunks[0]) == n_parts
    assert len(out.chunks[1]) == 1

    if coef:
        if n_targets > 1:
            assert coefs.shape == (n_features, n_targets), \
                   "coefs shape mismatch"
            assert len(coefs.chunks[1]) == 1
        else:
            assert coefs.shape == (n_features,), "coefs shape mismatch"
            assert len(coefs.chunks[0]) == 1

        test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative)

        std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0)

        test1, std_test2 = da.compute(test1, std_test2)

        diff = cp.abs(1.0 - std_test2)
        test2 = cp.all(diff < 1.5 * 10**(-1.))

        assert test1, \
            "Unexpected number of informative features"

        assert test2, "Unexpectedly incongruent outputs"

    data_ddh = DistributedDataHandler.create(data=(out, values),
                                             client=c)
    out_part, value_part = data_ddh.gpu_futures[0][1].result()

    if coef:
        coefs_ddh = DistributedDataHandler.create(data=coefs,
                                                  client=c)
        coefs_part = coefs_ddh.gpu_futures[0][1].result()
    if order == 'F':
        assert out_part.flags['F_CONTIGUOUS']
        if n_targets > 1:
            assert value_part.flags['F_CONTIGUOUS']
            if coef:
                assert coefs_part.flags['F_CONTIGUOUS']
    elif order == 'C':
        assert out_part.flags['C_CONTIGUOUS']
        if n_targets > 1:
            assert value_part.flags['C_CONTIGUOUS']
            if coef:
                assert coefs_part.flags['C_CONTIGUOUS']