def test_sparse_lasso_not_as_toy_dataset(): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = SparseLasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = DenseLasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 # check that the coefs are sparse assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative)
def test_fit_simple_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.Lasso enet = Solver(glm_stop_early=False, backend=backend) print("h2o4gpu fit()") enet.fit(X, y) print("h2o4gpu predict()") print(enet.predict(X)) print("h2o4gpu score()") print(enet.score(X, y)) enet_wrapper = Solver(positive=True, random_state=1234, backend=backend) print("h2o4gpu scikit wrapper fit()") enet_wrapper.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet_wrapper.predict(X)) print("h2o4gpu scikit wrapper score()") print(enet_wrapper.score(X, y)) from sklearn.linear_model.coordinate_descent import Lasso enet_sk = Lasso(positive=True, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit score()") print(enet_sk.score(X, y)) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() enet_sk_sparse_coef = csr_matrix(enet_sk.sparse_coef_, dtype=np.float32).toarray() if backend != 'h2o4gpu': print(enet_sk.coef_) print(enet_sk.sparse_coef_) print(enet_sk_coef) print(enet_sk_sparse_coef) print(enet_wrapper.coef_) print(enet_wrapper.sparse_coef_) print(enet_sk.intercept_) print(enet_wrapper.intercept_) print(enet_sk.n_iter_) print(enet_wrapper.n_iter_) print(enet_wrapper.time_prepare) print(enet_wrapper.time_upload_data) print(enet_wrapper.time_fitonly) assert np.allclose(enet_wrapper.coef_, enet_sk_coef) assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_) assert np.allclose(enet_wrapper.n_iter_, enet_sk.n_iter_)
def test_sparse_lasso_not_as_toy_dataset(): n_samples = 100 max_iter = 1000 n_informative = 10 X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert_greater(s_clf.score(X_test, y_test), 0.85) # check the convergence is the same as the dense version d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) d_clf.fit(X_train.todense(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert_greater(d_clf.score(X_test, y_test), 0.85) # check that the coefs are sparse assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative)