def test_lasso_default(datatype, nrows, column_info): ncols, n_info = column_info X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, random_state=0) X = X.astype(datatype) y = y.astype(datatype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) cu_lasso = cuLasso() cu_lasso.fit(X_train, y_train) assert cu_lasso.coef_ is not None cu_predict = cu_lasso.predict(X_test) cu_r2 = r2_score(y_test, cu_predict) sk_lasso = Lasso() sk_lasso.fit(X_train, y_train) sk_predict = sk_lasso.predict(X_test) sk_r2 = r2_score(y_test, sk_predict) assert cu_r2 >= sk_r2 - 0.07
def test_lasso(datatype, X_type, alpha, algorithm, nrows, column_info): ncols, n_info = column_info X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, random_state=0) X = X.astype(datatype) y = y.astype(datatype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) cu_lasso = cuLasso(alpha=np.array([alpha]), fit_intercept=True, normalize=False, max_iter=1000, selection=algorithm, tol=1e-10) cu_lasso.fit(X_train, y_train) assert cu_lasso.coef_ is not None cu_predict = cu_lasso.predict(X_test) cu_r2 = r2_score(y_test, cu_predict) if nrows < 500000: sk_lasso = Lasso(alpha=np.array([alpha]), fit_intercept=True, normalize=False, max_iter=1000, selection=algorithm, tol=1e-10) sk_lasso.fit(X_train, y_train) sk_predict = sk_lasso.predict(X_test) sk_r2 = r2_score(y_test, sk_predict) assert cu_r2 >= sk_r2 - 0.07
def test_lasso_predict_convert_dtype(train_dtype, test_dtype): X, y = make_regression(n_samples=50, n_features=10, n_informative=5, random_state=0) X = X.astype(train_dtype) y = y.astype(train_dtype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) clf = cuLasso() clf.fit(X_train, y_train) clf.predict(X_test.astype(test_dtype))
def test_lasso(datatype, X_type, alpha, algorithm, nrows, ncols, n_info): train_rows = np.int32(nrows * 0.8) X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, random_state=0) X_test = np.asarray(X[train_rows:, 0:], dtype=datatype) X_train = np.asarray(X[0:train_rows, :], dtype=datatype) y_train = np.asarray(y[0:train_rows, ], dtype=datatype) cu_lasso = cuLasso(alpha=np.array([alpha]), fit_intercept=True, normalize=False, max_iter=1000, selection=algorithm, tol=1e-10) if X_type == 'dataframe': y_train = pd.DataFrame({'fea0': y_train[0:, ]}) X_train = pd.DataFrame( {'fea%d' % i: X_train[0:, i] for i in range(X_train.shape[1])}) X_test = pd.DataFrame( {'fea%d' % i: X_test[0:, i] for i in range(X_test.shape[1])}) X_cudf = cudf.DataFrame.from_pandas(X_train) X_cudf_test = cudf.DataFrame.from_pandas(X_test) y_cudf = y_train.values y_cudf = y_cudf[:, 0] y_cudf = cudf.Series(y_cudf) cu_lasso.fit(X_cudf, y_cudf) cu_predict = cu_lasso.predict(X_cudf_test) elif X_type == 'ndarray': cu_lasso.fit(X_train, y_train) cu_predict = cu_lasso.predict(X_test) if nrows < 500000: sk_lasso = Lasso(alpha=np.array([alpha]), fit_intercept=True, normalize=False, max_iter=1000, selection=algorithm, tol=1e-10) sk_lasso.fit(X_train, y_train) sk_predict = sk_lasso.predict(X_test) assert array_equal(sk_predict, cu_predict, 1e-1, with_sign=True)
X_train_np = X_train.toarray() else: X_train_np = X_train if args.densify_all: X_train = X_train_np if args.test == 'ridge': sk = Ridge(fit_intercept=False, alpha=regularizer, max_iter=1000000, tol=1e-06) cu = cuRidge(fit_intercept=False, alpha=regularizer, solver='eig') elif args.test == 'lasso': sk = Lasso(fit_intercept=False, alpha=regularizer / X_train.shape[0]) cu = cuLasso(fit_intercept=False, alpha=regularizer / X_train.shape[0]) elif args.test == 'logistic': sk = Logistic(fit_intercept=False, C=regularizer, dual=True, solver='liblinear') cu = cuLogistic(fit_intercept=False, C=regularizer * X_train.shape[0], max_iter=100000, tol=1e-8) else: raise ("Invalid test") if args.densify_sk: X_train_sk = X_train_np else: