def test_lars_model(datatype, nrows, column_info, precompute, normalize): ncols, n_info = column_info X_train, X_test, y_train, y_test = make_regression_dataset( datatype, nrows, ncols, n_info) if precompute == 'precompute' or not normalize: # Apply normalization manually, because the solver expects normalized # input data X_train, y_train, x_mean, x_scale, y_mean = \ normalize_data(X_train, y_train) y_test = y_test - y_mean X_test = (X_test - x_mean) / x_scale if precompute == 'precompute': precompute = np.dot(X_train.T, X_train) params = {'precompute': precompute, 'normalize': normalize} # Initialization of cuML's LARS culars = cuLars(**params) # fit and predict cuml LARS culars.fit(X_train, y_train) cu_score_train = culars.score(X_train, y_train) cu_score_test = culars.score(X_test, y_test) if nrows < 500000: # sklearn model initialization, fit and predict sklars = skLars(**params) sklars.fit(X_train, y_train) # Set tolerance to include the 95% confidence interval around # scikit-learn accuracy. accuracy_target = sklars.score(X_test, y_test) tol = 1.96 * np.sqrt(accuracy_target * (1.0 - accuracy_target) / 100.0) if tol < 0.001: tol = 0.001 # We allow at least 0.1% tolerance print(cu_score_train, cu_score_test, accuracy_target, tol) assert cu_score_train >= sklars.score(X_train, y_train) - tol assert cu_score_test >= accuracy_target - tol else: assert cu_score_test > 0.95
def test_lars_attributes(datatype, params): X, y = load_boston(return_X_y=True) X = X.astype(datatype) y = y.astype(datatype) culars = cuLars(**params) culars.fit(X, y) sklars = skLars(**params) sklars.fit(X, y) assert culars.score(X, y) >= sklars.score(X, y) - 0.01 limit_max_iter = "n_nonzero_coefs" in params if limit_max_iter: n_iter_tol = 0 else: n_iter_tol = 2 assert abs(culars.n_iter_ - sklars.n_iter_) <= n_iter_tol tol = 1e-4 if params.pop("fit_intercept", True) else 1e-1 n = min(culars.n_iter_, sklars.n_iter_) assert array_equal(culars.alphas_[:n], sklars.alphas_[:n], unit_tol=tol, total_tol=1e-4) assert array_equal(culars.active_[:n], sklars.active_[:n]) if limit_max_iter: assert array_equal(culars.coef_, sklars.coef_) if hasattr(sklars, 'coef_path_'): assert array_equal(culars.coef_path_, sklars.coef_path_[sklars.active_], unit_tol=1e-3) intercept_diff = abs(culars.intercept_ - sklars.intercept_) if abs(sklars.intercept_) > 1e-6: intercept_diff /= sklars.intercept_ assert intercept_diff <= 1e-3