コード例 #1
0
ファイル: test_lars.py プロジェクト: daxiongshu/cuml
def test_lars_model(datatype, nrows, column_info, precompute, normalize):
    ncols, n_info = column_info
    X_train, X_test, y_train, y_test = make_regression_dataset(
        datatype, nrows, ncols, n_info)

    if precompute == 'precompute' or not normalize:
        # Apply normalization manually, because the solver expects normalized
        # input data
        X_train, y_train, x_mean, x_scale, y_mean = \
            normalize_data(X_train, y_train)
        y_test = y_test - y_mean
        X_test = (X_test - x_mean) / x_scale

    if precompute == 'precompute':
        precompute = np.dot(X_train.T, X_train)

    params = {'precompute': precompute, 'normalize': normalize}

    # Initialization of cuML's LARS
    culars = cuLars(**params)

    # fit and predict cuml LARS
    culars.fit(X_train, y_train)

    cu_score_train = culars.score(X_train, y_train)
    cu_score_test = culars.score(X_test, y_test)

    if nrows < 500000:
        # sklearn model initialization, fit and predict
        sklars = skLars(**params)
        sklars.fit(X_train, y_train)
        # Set tolerance to include the 95% confidence interval around
        # scikit-learn accuracy.
        accuracy_target = sklars.score(X_test, y_test)
        tol = 1.96 * np.sqrt(accuracy_target * (1.0 - accuracy_target) / 100.0)
        if tol < 0.001:
            tol = 0.001  # We allow at least 0.1% tolerance
        print(cu_score_train, cu_score_test, accuracy_target, tol)
        assert cu_score_train >= sklars.score(X_train, y_train) - tol
        assert cu_score_test >= accuracy_target - tol
    else:
        assert cu_score_test > 0.95
コード例 #2
0
ファイル: test_lars.py プロジェクト: daxiongshu/cuml
def test_lars_attributes(datatype, params):
    X, y = load_boston(return_X_y=True)
    X = X.astype(datatype)
    y = y.astype(datatype)

    culars = cuLars(**params)
    culars.fit(X, y)

    sklars = skLars(**params)
    sklars.fit(X, y)

    assert culars.score(X, y) >= sklars.score(X, y) - 0.01

    limit_max_iter = "n_nonzero_coefs" in params
    if limit_max_iter:
        n_iter_tol = 0
    else:
        n_iter_tol = 2

    assert abs(culars.n_iter_ - sklars.n_iter_) <= n_iter_tol

    tol = 1e-4 if params.pop("fit_intercept", True) else 1e-1
    n = min(culars.n_iter_, sklars.n_iter_)
    assert array_equal(culars.alphas_[:n],
                       sklars.alphas_[:n],
                       unit_tol=tol,
                       total_tol=1e-4)
    assert array_equal(culars.active_[:n], sklars.active_[:n])

    if limit_max_iter:
        assert array_equal(culars.coef_, sklars.coef_)

        if hasattr(sklars, 'coef_path_'):
            assert array_equal(culars.coef_path_,
                               sklars.coef_path_[sklars.active_],
                               unit_tol=1e-3)

        intercept_diff = abs(culars.intercept_ - sklars.intercept_)
        if abs(sklars.intercept_) > 1e-6:
            intercept_diff /= sklars.intercept_
            assert intercept_diff <= 1e-3