def test_ridge(): # Ridge regression convergence test using score # TODO: for this test to be robust, we should use a dataset instead # of np.random. rng = np.random.RandomState(0) alpha = 1.0 for solver in ("svd", "sparse_cg", "cholesky", "lsqr"): # With more samples than features n_samples, n_features = 6, 5 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert_equal(ridge.coef_.shape, (X.shape[1], )) assert_greater(ridge.score(X, y), 0.47) if solver == "cholesky": # Currently the only solver to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert_greater(ridge.score(X, y), 0.47) # With more features than samples n_samples, n_features = 5, 10 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert_greater(ridge.score(X, y), .9) if solver == "cholesky": # Currently the only solver to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert_greater(ridge.score(X, y), 0.9)
def test_ridge(): """Ridge regression convergence test using score TODO: for this test to be robust, we should use a dataset instead of np.random. """ alpha = 1.0 for solver in ("sparse_cg", "dense_cholesky", "lsqr"): # With more samples than features n_samples, n_features = 6, 5 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert_equal(ridge.coef_.shape, (X.shape[1], )) assert_greater(ridge.score(X, y), 0.47) ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert_greater(ridge.score(X, y), 0.47) # With more features than samples n_samples, n_features = 5, 10 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert_greater(ridge.score(X, y), .9) ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert_greater(ridge.score(X, y), 0.9)
def test_ridge(solver): # Ridge regression convergence test using score # TODO: for this test to be robust, we should use a dataset instead # of np.random. rng = np.random.RandomState(0) alpha = 1.0 # With more samples than features n_samples, n_features = 6, 5 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert ridge.coef_.shape == (X.shape[1], ) assert ridge.score(X, y) > 0.47 if solver in ("cholesky", "sag"): # Currently the only solvers to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert ridge.score(X, y) > 0.47 # With more features than samples n_samples, n_features = 5, 10 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert ridge.score(X, y) > .9 if solver in ("cholesky", "sag"): # Currently the only solvers to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert ridge.score(X, y) > 0.9
def test_ridge(): """Ridge regression convergence test using score TODO: for this test to be robust, we should use a dataset instead of np.random. """ alpha = 1.0 # With more samples than features n_samples, n_features = 6, 5 y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert_equal(ridge.coef_.shape, (X.shape[1],)) assert ridge.score(X, y) > 0.5 ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert ridge.score(X, y) > 0.5 # With more features than samples n_samples, n_features = 5, 10 y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert ridge.score(X, y) > 0.9 ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert ridge.score(X, y) > 0.9
def test_ridge(): """Ridge regression convergence test using score TODO: for this test to be robust, we should use a dataset instead of np.random. """ alpha = 1.0 # With more samples than features n_samples, n_features = 6, 5 y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert_equal(ridge.coef_.shape, (X.shape[1], )) assert_true(ridge.score(X, y) > 0.5) ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert_true(ridge.score(X, y) > 0.5) # With more features than samples n_samples, n_features = 5, 10 y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert_true(ridge.score(X, y) > .9) ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert_true(ridge.score(X, y) > 0.9)
def _test_tolerance(filter_): ridge = Ridge(tol=1e-5) ridge.fit(filter_(X_diabetes), y_diabetes) score = ridge.score(filter_(X_diabetes), y_diabetes) ridge2 = Ridge(tol=1e-3) ridge2.fit(filter_(X_diabetes), y_diabetes) score2 = ridge2.score(filter_(X_diabetes), y_diabetes) assert_true(score >= score2)
def _test_tolerance(filter_): ridge = Ridge(tol=1e-5, fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) score = ridge.score(filter_(X_diabetes), y_diabetes) ridge2 = Ridge(tol=1e-3, fit_intercept=False) ridge2.fit(filter_(X_diabetes), y_diabetes) score2 = ridge2.score(filter_(X_diabetes), y_diabetes) assert score >= score2
def test_ridge_singular(): # test on a singular matrix rng = np.random.RandomState(0) n_samples, n_features = 6, 6 y = rng.randn(n_samples // 2) y = np.concatenate((y, y)) X = rng.randn(n_samples // 2, n_features) X = np.concatenate((X, X), axis=0) ridge = Ridge(alpha=0) ridge.fit(X, y) assert_greater(ridge.score(X, y), 0.9)
def test_fit_simple_backupsklearn(): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.Ridge enet = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet.fit(X, y) print("h2o4gpu predict()") print(enet.predict(X)) print("h2o4gpu score()") print(enet.score(X, y)) enet_wrapper = Solver(normalize=True, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet_wrapper.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet_wrapper.predict(X)) print("h2o4gpu scikit wrapper score()") print(enet_wrapper.score(X, y)) from sklearn.linear_model.ridge import Ridge enet_sk = Ridge(normalize=True, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit score()") print(enet_sk.score(X, y)) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet_wrapper.coef_) print(enet_sk.intercept_) print(enet_wrapper.intercept_) print(enet_sk.n_iter_) print(enet_wrapper.n_iter_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet_wrapper.coef_, enet_sk_coef) assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)
def test_fit_simple_backupsklearn(): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.Ridge enet = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet.fit(X, y) print("h2o4gpu predict()") print(enet.predict(X)) print("h2o4gpu score()") print(enet.score(X,y)) enet_wrapper = Solver(normalize=True, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet_wrapper.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet_wrapper.predict(X)) print("h2o4gpu scikit wrapper score()") print(enet_wrapper.score(X, y)) from sklearn.linear_model.ridge import Ridge enet_sk = Ridge(normalize=True, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit score()") print(enet_sk.score(X, y)) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet_wrapper.coef_) print(enet_sk.intercept_) print(enet_wrapper.intercept_) print(enet_sk.n_iter_) print(enet_wrapper.n_iter_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet_wrapper.coef_, enet_sk_coef) assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)
def _test_ridge_diabetes(filter_): ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)