def test_dtype_match_cholesky(): # Test different alphas in cholesky solver to ensure full coverage. # This test is separated from test_dtype_match for clarity. rng = np.random.RandomState(0) alpha = (1.0, 0.5) n_samples, n_features, n_target = 6, 7, 2 X_64 = rng.randn(n_samples, n_features) y_64 = rng.randn(n_samples, n_target) X_32 = X_64.astype(np.float32) y_32 = y_64.astype(np.float32) # Check type consistency 32bits ridge_32 = Ridge(alpha=alpha, solver='cholesky') ridge_32.fit(X_32, y_32) coef_32 = ridge_32.coef_ # Check type consistency 64 bits ridge_64 = Ridge(alpha=alpha, solver='cholesky') ridge_64.fit(X_64, y_64) coef_64 = ridge_64.coef_ # Do all the checks at once, like this is easier to debug assert coef_32.dtype == X_32.dtype assert coef_64.dtype == X_64.dtype assert ridge_32.predict(X_32).dtype == X_32.dtype assert ridge_64.predict(X_64).dtype == X_64.dtype assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
def test_ridge_individual_penalties(): # Tests the ridge object using individual penalties rng = np.random.RandomState(42) n_samples, n_features, n_targets = 20, 10, 5 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples, n_targets) penalties = np.arange(n_targets) coef_cholesky = np.array([ Ridge(alpha=alpha, solver="cholesky").fit(X, target).coef_ for alpha, target in zip(penalties, y.T) ]) coefs_indiv_pen = [ Ridge(alpha=penalties, solver=solver, tol=1e-8).fit(X, y).coef_ for solver in ['svd', 'sparse_cg', 'lsqr', 'cholesky', 'sag', 'saga'] ] for coef_indiv_pen in coefs_indiv_pen: assert_array_almost_equal(coef_cholesky, coef_indiv_pen) # Test error is raised when number of targets and penalties do not match. ridge = Ridge(alpha=penalties[:-1]) assert_raises(ValueError, ridge.fit, X, y)
def test_ridge_fit_intercept_sparse_error(solver): X, y = _make_sparse_offset_regression(n_features=20, random_state=0) X_csr = sp.csr_matrix(X) sparse_ridge = Ridge(solver=solver) err_msg = "solver='{}' does not support".format(solver) with pytest.raises(ValueError, match=err_msg): sparse_ridge.fit(X_csr, y)
def test_dtype_match(solver): rng = np.random.RandomState(0) alpha = 1.0 n_samples, n_features = 6, 5 X_64 = rng.randn(n_samples, n_features) y_64 = rng.randn(n_samples) X_32 = X_64.astype(np.float32) y_32 = y_64.astype(np.float32) tol = 2 * np.finfo(np.float32).resolution # Check type consistency 32bits ridge_32 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=tol) ridge_32.fit(X_32, y_32) coef_32 = ridge_32.coef_ # Check type consistency 64 bits ridge_64 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=tol) ridge_64.fit(X_64, y_64) coef_64 = ridge_64.coef_ # Do the actual checks at once for easier debug assert coef_32.dtype == X_32.dtype assert coef_64.dtype == X_64.dtype assert ridge_32.predict(X_32).dtype == X_32.dtype assert ridge_64.predict(X_64).dtype == X_64.dtype assert_allclose(ridge_32.coef_, ridge_64.coef_, rtol=1e-4, atol=5e-4)
def test_solver_consistency(solver, proportion_nonzero, n_samples, dtype, sparse_X, seed): alpha = 1. noise = 50. if proportion_nonzero > .9 else 500. X, y = _make_sparse_offset_regression( bias=10, n_features=30, proportion_nonzero=proportion_nonzero, noise=noise, random_state=seed, n_samples=n_samples) svd_ridge = Ridge(solver='svd', normalize=True, alpha=alpha).fit(X, y) X = X.astype(dtype, copy=False) y = y.astype(dtype, copy=False) if sparse_X: X = sp.csr_matrix(X) if solver == 'ridgecv': ridge = RidgeCV(alphas=[alpha], normalize=True) else: ridge = Ridge(solver=solver, tol=1e-10, normalize=True, alpha=alpha) ridge.fit(X, y) assert_allclose(ridge.coef_, svd_ridge.coef_, atol=1e-3, rtol=1e-3) assert_allclose(ridge.intercept_, svd_ridge.intercept_, atol=1e-3, rtol=1e-3)
def _test_tolerance(filter_): ridge = Ridge(tol=1e-5, fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) score = ridge.score(filter_(X_diabetes), y_diabetes) ridge2 = Ridge(tol=1e-3, fit_intercept=False) ridge2.fit(filter_(X_diabetes), y_diabetes) score2 = ridge2.score(filter_(X_diabetes), y_diabetes) assert score >= score2
def test_ridge_singular(): # test on a singular matrix rng = np.random.RandomState(0) n_samples, n_features = 6, 6 y = rng.randn(n_samples // 2) y = np.concatenate((y, y)) X = rng.randn(n_samples // 2, n_features) X = np.concatenate((X, X), axis=0) ridge = Ridge(alpha=0) ridge.fit(X, y) assert ridge.score(X, y) > 0.9
def test_sparse_design_with_sample_weights(): # Sample weights must work with sparse matrices n_sampless = [2, 3] n_featuress = [3, 2] rng = np.random.RandomState(42) sparse_matrix_converters = [ sp.coo_matrix, sp.csr_matrix, sp.csc_matrix, sp.lil_matrix, sp.dok_matrix ] sparse_ridge = Ridge(alpha=1., fit_intercept=False) dense_ridge = Ridge(alpha=1., fit_intercept=False) for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) sample_weights = rng.randn(n_samples)**2 + 1 for sparse_converter in sparse_matrix_converters: X_sparse = sparse_converter(X) sparse_ridge.fit(X_sparse, y, sample_weight=sample_weights) dense_ridge.fit(X, y, sample_weight=sample_weights) assert_array_almost_equal(sparse_ridge.coef_, dense_ridge.coef_, decimal=6)
def test_ridge_sample_weights(): # TODO: loop over sparse data as well # Note: parametrizing this test with pytest results in failed # assertions, meaning that is is not extremely robust rng = np.random.RandomState(0) param_grid = product((1.0, 1e-2), (True, False), ('svd', 'cholesky', 'lsqr', 'sparse_cg')) for n_samples, n_features in ((6, 5), (5, 10)): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) for (alpha, intercept, solver) in param_grid: # Ridge with explicit sample_weight est = Ridge(alpha=alpha, fit_intercept=intercept, solver=solver, tol=1e-6) est.fit(X, y, sample_weight=sample_weight) coefs = est.coef_ inter = est.intercept_ # Closed form of the weighted regularized least square # theta = (X^T W X + alpha I)^(-1) * X^T W y W = np.diag(sample_weight) if intercept is False: X_aug = X I = np.eye(n_features) else: dummy_column = np.ones(shape=(n_samples, 1)) X_aug = np.concatenate((dummy_column, X), axis=1) I = np.eye(n_features + 1) I[0, 0] = 0 cf_coefs = linalg.solve( X_aug.T.dot(W).dot(X_aug) + alpha * I, X_aug.T.dot(W).dot(y)) if intercept is False: assert_array_almost_equal(coefs, cf_coefs) else: assert_array_almost_equal(coefs, cf_coefs[1:]) assert_almost_equal(inter, cf_coefs[0])
def test_ridge_sag_with_X_fortran(): # check that Fortran array are converted when using SAG solver X, y = make_regression(random_state=42) # for the order of X and y to not be C-ordered arrays X = np.asfortranarray(X) X = X[::2, :] y = y[::2] Ridge(solver='sag').fit(X, y)
def _test_ridge_cv_normalize(filter_): ridge_cv = RidgeCV(normalize=True, cv=3) ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes) gs = GridSearchCV(Ridge(normalize=True, solver='sparse_cg'), cv=3, param_grid={'alpha': ridge_cv.alphas}) gs.fit(filter_(10. * X_diabetes), y_diabetes) assert gs.best_estimator_.alpha == ridge_cv.alpha_
def test_ridge_gcv_sample_weights(gcv_mode, X_constructor, fit_intercept, n_features, y_shape, noise): alphas = [1e-3, .1, 1., 10., 1e3] rng = np.random.RandomState(0) n_targets = y_shape[-1] if len(y_shape) == 2 else 1 X, y = _make_sparse_offset_regression(n_samples=11, n_features=n_features, n_targets=n_targets, random_state=0, shuffle=False, noise=noise) y = y.reshape(y_shape) sample_weight = 3 * rng.randn(len(X)) sample_weight = (sample_weight - sample_weight.min() + 1).astype(int) indices = np.repeat(np.arange(X.shape[0]), sample_weight) sample_weight = sample_weight.astype(float) X_tiled, y_tiled = X[indices], y[indices] cv = GroupKFold(n_splits=X.shape[0]) splits = cv.split(X_tiled, y_tiled, groups=indices) kfold = RidgeCV(alphas=alphas, cv=splits, scoring='neg_mean_squared_error', fit_intercept=fit_intercept) # ignore warning from GridSearchCV: DeprecationWarning: The default of the # `iid` parameter will change from True to False in version 0.22 and will # be removed in 0.24 with ignore_warnings(category=DeprecationWarning): kfold.fit(X_tiled, y_tiled) ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept) splits = cv.split(X_tiled, y_tiled, groups=indices) predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits) kfold_errors = (y_tiled - predictions)**2 kfold_errors = [ np.sum(kfold_errors[indices == i], axis=0) for i in np.arange(X.shape[0]) ] kfold_errors = np.asarray(kfold_errors) X_gcv = X_constructor(X) gcv_ridge = RidgeCV(alphas=alphas, store_cv_values=True, gcv_mode=gcv_mode, fit_intercept=fit_intercept) gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight) if len(y_shape) == 2: gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)] else: gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)] assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_) assert_allclose(gcv_errors, kfold_errors, rtol=1e-3) assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3) assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
def _test_multi_ridge_diabetes(filter_): # simulate several responses Y = np.vstack((y_diabetes, y_diabetes)).T n_features = X_diabetes.shape[1] ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), Y) assert ridge.coef_.shape == (2, n_features) Y_pred = ridge.predict(filter_(X_diabetes)) ridge.fit(filter_(X_diabetes), y_diabetes) y_pred = ridge.predict(filter_(X_diabetes)) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_n_iter(): # Test that self.n_iter_ is correct. n_targets = 2 X, y = X_diabetes, y_diabetes y_n = np.tile(y, (n_targets, 1)).T for max_iter in range(1, 4): for solver in ('sag', 'saga', 'lsqr'): reg = Ridge(solver=solver, max_iter=max_iter, tol=1e-12) reg.fit(X, y_n) assert_array_equal(reg.n_iter_, np.tile(max_iter, n_targets)) for solver in ('sparse_cg', 'svd', 'cholesky'): reg = Ridge(solver=solver, max_iter=1, tol=1e-1) reg.fit(X, y_n) assert reg.n_iter_ is None
def test_ridge_fit_intercept_sparse_sag(): X, y = _make_sparse_offset_regression(n_features=5, n_samples=20, random_state=0, X_offset=5.) X_csr = sp.csr_matrix(X) params = dict(alpha=1., solver='sag', fit_intercept=True, tol=1e-10, max_iter=100000) dense_ridge = Ridge(**params) sparse_ridge = Ridge(**params) dense_ridge.fit(X, y) with pytest.warns(None) as record: sparse_ridge.fit(X_csr, y) assert len(record) == 0 assert np.allclose(dense_ridge.intercept_, sparse_ridge.intercept_, rtol=1e-4) assert np.allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=1e-4) with pytest.warns(UserWarning, match='"sag" solver requires.*'): Ridge(solver='sag').fit(X_csr, y)
def test_ridge_shapes(): # Test shape of coef_ and intercept_ rng = np.random.RandomState(0) n_samples, n_features = 5, 10 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) Y1 = y[:, np.newaxis] Y = np.c_[y, 1 + y] ridge = Ridge() ridge.fit(X, y) assert ridge.coef_.shape == (n_features, ) assert ridge.intercept_.shape == () ridge.fit(X, Y1) assert ridge.coef_.shape == (1, n_features) assert ridge.intercept_.shape == (1, ) ridge.fit(X, Y) assert ridge.coef_.shape == (2, n_features) assert ridge.intercept_.shape == (2, )
def test_ridge_intercept(): # Test intercept with multiple targets GH issue #708 rng = np.random.RandomState(0) n_samples, n_features = 5, 10 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) Y = np.c_[y, 1. + y] ridge = Ridge() ridge.fit(X, y) intercept = ridge.intercept_ ridge.fit(X, Y) assert_almost_equal(ridge.intercept_[0], intercept) assert_almost_equal(ridge.intercept_[1], intercept + 1.)
def test_toy_ridge_object(): # Test BayesianRegression ridge classifier # TODO: test also n_samples > n_features X = np.array([[1], [2]]) Y = np.array([1, 2]) reg = Ridge(alpha=0.0) reg.fit(X, Y) X_test = [[1], [2], [3], [4]] assert_almost_equal(reg.predict(X_test), [1., 2, 3, 4]) assert len(reg.coef_.shape) == 1 assert type(reg.intercept_) == np.float64 Y = np.vstack((Y, Y)).T reg.fit(X, Y) X_test = [[1], [2], [3], [4]] assert len(reg.coef_.shape) == 2 assert type(reg.intercept_) == np.ndarray
def test_ridgecv_sample_weight(): rng = np.random.RandomState(0) alphas = (0.1, 1.0, 10.0) # There are different algorithms for n_samples > n_features # and the opposite, so test them both. for n_samples, n_features in ((6, 5), (5, 10)): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) cv = KFold(5) ridgecv = RidgeCV(alphas=alphas, cv=cv) ridgecv.fit(X, y, sample_weight=sample_weight) # Check using GridSearchCV directly parameters = {'alpha': alphas} gs = GridSearchCV(Ridge(), parameters, cv=cv) gs.fit(X, y, sample_weight=sample_weight) assert ridgecv.alpha_ == gs.best_estimator_.alpha assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)
def test_ridge_fit_intercept_sparse(solver): X, y = _make_sparse_offset_regression(n_features=20, random_state=0) X_csr = sp.csr_matrix(X) # for now only sparse_cg can correctly fit an intercept with sparse X with # default tol and max_iter. # sag is tested separately in test_ridge_fit_intercept_sparse_sag # because it requires more iterations and should raise a warning if default # max_iter is used. # other solvers raise an exception, as checked in # test_ridge_fit_intercept_sparse_error # # "auto" should switch to "sparse_cg" when X is sparse # so the reference we use for both ("auto" and "sparse_cg") is # Ridge(solver="sparse_cg"), fitted using the dense representation (note # that "sparse_cg" can fit sparse or dense data) dense_ridge = Ridge(solver='sparse_cg') sparse_ridge = Ridge(solver=solver) dense_ridge.fit(X, y) with pytest.warns(None) as record: sparse_ridge.fit(X_csr, y) assert len(record) == 0 assert np.allclose(dense_ridge.intercept_, sparse_ridge.intercept_) assert np.allclose(dense_ridge.coef_, sparse_ridge.coef_)
def test_ridge_vs_lstsq(): # On alpha=0., Ridge and OLS yield the same solution. rng = np.random.RandomState(0) # we need more samples than features n_samples, n_features = 5, 4 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=0., fit_intercept=False) ols = LinearRegression(fit_intercept=False) ridge.fit(X, y) ols.fit(X, y) assert_almost_equal(ridge.coef_, ols.coef_) ridge.fit(X, y) ols.fit(X, y) assert_almost_equal(ridge.coef_, ols.coef_)
def test_raises_value_error_if_sample_weights_greater_than_1d(): # Sample weights must be either scalar or 1D n_sampless = [2, 3] n_featuress = [3, 2] rng = np.random.RandomState(42) for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) sample_weights_OK = rng.randn(n_samples)**2 + 1 sample_weights_OK_1 = 1. sample_weights_OK_2 = 2. sample_weights_not_OK = sample_weights_OK[:, np.newaxis] sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :] ridge = Ridge(alpha=1) # make sure the "OK" sample weights actually work ridge.fit(X, y, sample_weights_OK) ridge.fit(X, y, sample_weights_OK_1) ridge.fit(X, y, sample_weights_OK_2) def fit_ridge_not_ok(): ridge.fit(X, y, sample_weights_not_OK) def fit_ridge_not_ok_2(): ridge.fit(X, y, sample_weights_not_OK_2) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok_2)
def test_ridge_sparse_svd(): X = sp.csc_matrix(rng.rand(100, 10)) y = rng.rand(100) ridge = Ridge(solver='svd', fit_intercept=False) assert_raises(TypeError, ridge.fit, X, y)
def test_ridge(solver): # Ridge regression convergence test using score # TODO: for this test to be robust, we should use a dataset instead # of np.random. rng = np.random.RandomState(0) alpha = 1.0 # With more samples than features n_samples, n_features = 6, 5 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert ridge.coef_.shape == (X.shape[1], ) assert ridge.score(X, y) > 0.47 if solver in ("cholesky", "sag"): # Currently the only solvers to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert ridge.score(X, y) > 0.47 # With more features than samples n_samples, n_features = 5, 10 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) assert ridge.score(X, y) > .9 if solver in ("cholesky", "sag"): # Currently the only solvers to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) assert ridge.score(X, y) > 0.9
def _test_ridge_diabetes(filter_): ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)
}, { 'name': 'RandomForest', 'instance': RandomForestRegressor(), 'complexity_label': 'estimators', 'complexity_computer': lambda clf: clf.n_estimators }, { 'name': 'SVR', 'instance': SVR(kernel='rbf'), 'complexity_label': 'support vectors', 'complexity_computer': lambda clf: len(clf.support_vectors_) }, ] } benchmark(configuration) # benchmark n_features influence on prediction speed percentile = 90 percentiles = n_feature_influence({'ridge': Ridge()}, configuration['n_train'], configuration['n_test'], [100, 250, 500], percentile) plot_n_features_influence(percentiles, percentile) # benchmark throughput throughputs = benchmark_throughputs(configuration) plot_benchmark_throughput(throughputs, configuration) stop_time = time.time() print("example run in %.2fs" % (stop_time - start_time))
def test_sparse_cg_max_iter(): reg = Ridge(solver="sparse_cg", max_iter=1) reg.fit(X_diabetes, y_diabetes) assert reg.coef_.shape[0] == X_diabetes.shape[1]