Ejemplo n.º 1
0
def test_lars_copy_X(datatype):
    X, y = load_boston(return_X_y=True)
    X = cp.asarray(X, dtype=datatype, order='F')
    y = cp.asarray(y, dtype=datatype, order='F')

    X0 = cp.copy(X)
    culars1 = cuLars(precompute=False, copy_X=True)
    culars1.fit(X, y)
    # Test that array was not changed
    assert cp.all(X0 == X)
Ejemplo n.º 2
0
def test_lars_collinear(datatype, nrows, column_info, precompute):
    ncols, n_info = column_info

    X_train, X_test, y_train, y_test = make_regression_dataset(
        datatype, nrows, ncols, n_info)
    n_duplicate = min(ncols, 100)
    X_train = np.concatenate((X_train, X_train[:, :n_duplicate]), axis=1)
    X_test = np.concatenate((X_test, X_test[:, :n_duplicate]), axis=1)

    params = {"precompute": precompute, "n_nonzero_coefs": ncols + n_duplicate}
    culars = cuLars(**params)
    culars.fit(X_train, y_train)

    assert culars.score(X_train, y_train) > 0.85
    assert culars.score(X_test, y_test) > 0.85
Ejemplo n.º 3
0
def test_lars_model(datatype, nrows, column_info, precompute, normalize):
    ncols, n_info = column_info
    X_train, X_test, y_train, y_test = make_regression_dataset(
        datatype, nrows, ncols, n_info)

    if precompute == 'precompute' or not normalize:
        # Apply normalization manually, because the solver expects normalized
        # input data
        X_train, y_train, x_mean, x_scale, y_mean = \
            normalize_data(X_train, y_train)
        y_test = y_test - y_mean
        X_test = (X_test - x_mean) / x_scale

    if precompute == 'precompute':
        precompute = np.dot(X_train.T, X_train)

    params = {'precompute': precompute, 'normalize': normalize}

    # Initialization of cuML's LARS
    culars = cuLars(**params)

    # fit and predict cuml LARS
    culars.fit(X_train, y_train)

    cu_score_train = culars.score(X_train, y_train)
    cu_score_test = culars.score(X_test, y_test)

    if nrows < 500000:
        # sklearn model initialization, fit and predict
        sklars = skLars(**params)
        sklars.fit(X_train, y_train)
        # Set tolerance to include the 95% confidence interval around
        # scikit-learn accuracy.
        accuracy_target = sklars.score(X_test, y_test)
        tol = 1.96 * np.sqrt(accuracy_target * (1.0 - accuracy_target) / 100.0)
        if tol < 0.001:
            tol = 0.001  # We allow at least 0.1% tolerance
        print(cu_score_train, cu_score_test, accuracy_target, tol)
        assert cu_score_train >= sklars.score(X_train, y_train) - tol
        assert cu_score_test >= accuracy_target - tol
    else:
        assert cu_score_test > 0.95
Ejemplo n.º 4
0
def test_lars_attributes(datatype, params):
    X, y = load_boston(return_X_y=True)
    X = X.astype(datatype)
    y = y.astype(datatype)

    culars = cuLars(**params)
    culars.fit(X, y)

    sklars = skLars(**params)
    sklars.fit(X, y)

    assert culars.score(X, y) >= sklars.score(X, y) - 0.01

    limit_max_iter = "n_nonzero_coefs" in params
    if limit_max_iter:
        n_iter_tol = 0
    else:
        n_iter_tol = 2

    assert abs(culars.n_iter_ - sklars.n_iter_) <= n_iter_tol

    tol = 1e-4 if params.pop("fit_intercept", True) else 1e-1
    n = min(culars.n_iter_, sklars.n_iter_)
    assert array_equal(culars.alphas_[:n],
                       sklars.alphas_[:n],
                       unit_tol=tol,
                       total_tol=1e-4)
    assert array_equal(culars.active_[:n], sklars.active_[:n])

    if limit_max_iter:
        assert array_equal(culars.coef_, sklars.coef_)

        if hasattr(sklars, 'coef_path_'):
            assert array_equal(culars.coef_path_,
                               sklars.coef_path_[sklars.active_],
                               unit_tol=1e-3)

        intercept_diff = abs(culars.intercept_ - sklars.intercept_)
        if abs(sklars.intercept_) > 1e-6:
            intercept_diff /= sklars.intercept_
            assert intercept_diff <= 1e-3
Ejemplo n.º 5
0
def test_lars_collinear(datatype, nrows, column_info, precompute):
    ncols, n_info = column_info
    if nrows == 500000 and ncols == 1000 and pytest.max_gpu_memory < 32:
        if pytest.adapt_stress_test:
            nrows = nrows * pytest.max_gpu_memory // 32
        else:
            pytest.skip("Insufficient GPU memory for this test."
                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")

    X_train, X_test, y_train, y_test = make_regression_dataset(
        datatype, nrows, ncols, n_info)
    n_duplicate = min(ncols, 100)
    X_train = np.concatenate((X_train, X_train[:, :n_duplicate]), axis=1)
    X_test = np.concatenate((X_test, X_test[:, :n_duplicate]), axis=1)

    params = {"precompute": precompute, "n_nonzero_coefs": ncols + n_duplicate}
    culars = cuLars(**params)
    culars.fit(X_train, y_train)

    assert culars.score(X_train, y_train) > 0.85
    assert culars.score(X_test, y_test) > 0.85