def test_return_std(): # Test return_std option for both Bayesian regressors def f(X): return np.dot(X, w) + b def f_noise(X, noise_mult): return f(X) + np.random.randn(X.shape[0]) * noise_mult d = 5 n_train = 50 n_test = 10 w = np.array([1.0, 0.0, 1.0, -1.0, 0.0]) b = 1.0 X = np.random.random((n_train, d)) X_test = np.random.random((n_test, d)) for decimal, noise_mult in enumerate([1, 0.1, 0.01]): y = f_noise(X, noise_mult) m1 = BayesianRidge() m1.fit(X, y) y_mean1, y_std1 = m1.predict(X_test, return_std=True) assert_array_almost_equal(y_std1, noise_mult, decimal=decimal) m2 = ARDRegression() m2.fit(X, y) y_mean2, y_std2 = m2.predict(X_test, return_std=True) assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
def test_bayesian_ridge_scores(): """Check scores attribute shape""" X, y = diabetes.data, diabetes.target clf = BayesianRidge(compute_score=True) clf.fit(X, y) assert clf.scores_.shape == (clf.n_iter_ + 1, )
def test_toy_bayesian_ridge_object(): # Test BayesianRidge on toy X = np.array([[1], [2], [6], [8], [10]]) Y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(compute_score=True) clf.fit(X, Y) # Check that the model could approximately learn the identity function test = [[1], [3], [4]] assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
def test_bayesian_initial_params(): # Test BayesianRidge with initial values (alpha_init, lambda_init) X = np.vander(np.linspace(0, 4, 5), 4) y = np.array([0., 1., 0., -1., 0.]) # y = (x^3 - 6x^2 + 8x) / 3 # In this case, starting from the default initial values will increase # the bias of the fitted curve. So, lambda_init should be small. reg = BayesianRidge(alpha_init=1., lambda_init=1e-3) # Check the R2 score nearly equals to one. r2 = reg.fit(X, y).score(X, y) assert_almost_equal(r2, 1.)
def test_n_iter(): """Check value of n_iter.""" X = np.array([[1], [2], [6], [8], [10]]) y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(n_iter=0) msg = "n_iter should be greater than or equal to 1." assert_raise_message(ValueError, msg, clf.fit, X, y)
def test_bayesian_ridge_score_values(): """Check value of score on toy example. Compute log marginal likelihood with equation (36) in Sparse Bayesian Learning and the Relevance Vector Machine (Tipping, 2001): - 0.5 * (log |Id/alpha + X.X^T/lambda| + y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi)) + lambda_1 * log(lambda) - lambda_2 * lambda + alpha_1 * log(alpha) - alpha_2 * alpha and check equality with the score computed during training. """ X, y = diabetes.data, diabetes.target n_samples = X.shape[0] # check with initial values of alpha and lambda (see code for the values) eps = np.finfo(np.float64).eps alpha_ = 1. / (np.var(y) + eps) lambda_ = 1. # value of the parameters of the Gamma hyperpriors alpha_1 = 0.1 alpha_2 = 0.1 lambda_1 = 0.1 lambda_2 = 0.1 # compute score using formula of docstring score = lambda_1 * log(lambda_) - lambda_2 * lambda_ score += alpha_1 * log(alpha_) - alpha_2 * alpha_ M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T) M_inv = pinvh(M) score += -0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) + n_samples * log(2 * np.pi)) # compute score with BayesianRidge clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2, lambda_1=lambda_1, lambda_2=lambda_2, n_iter=1, fit_intercept=False, compute_score=True) clf.fit(X, y) assert_almost_equal(clf.scores_[0], score, decimal=9)
def test_bayesian_ridge_parameter(): # Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224) X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]]) y = np.array([1, 2, 3, 2, 0, 4, 5]).T # A Ridge regression model using an alpha value equal to the ratio of # lambda_ and alpha_ from the Bayesian Ridge model must be identical br_model = BayesianRidge(compute_score=True).fit(X, y) rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y) assert_array_almost_equal(rr_model.coef_, br_model.coef_) assert_almost_equal(rr_model.intercept_, br_model.intercept_)
def test_bayesian_sample_weights(): # Test correctness of the sample_weights method X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]]) y = np.array([1, 2, 3, 2, 0, 4, 5]).T w = np.array([4, 3, 3, 1, 1, 2, 3]).T # A Ridge regression model using an alpha value equal to the ratio of # lambda_ and alpha_ from the Bayesian Ridge model must be identical br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w) rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit( X, y, sample_weight=w) assert_array_almost_equal(rr_model.coef_, br_model.coef_) assert_almost_equal(rr_model.intercept_, br_model.intercept_)
def test_std_bayesian_ridge_ard_with_constant_input(): # Test BayesianRidge and ARDRegression standard dev. for edge case of # constant target vector # The standard dev. should be relatively small (< 0.01 is tested here) n_samples = 4 n_features = 5 random_state = check_random_state(42) constant_value = random_state.rand() X = random_state.random_sample((n_samples, n_features)) y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype) expected_upper_boundary = 0.01 for clf in [BayesianRidge(), ARDRegression()]: _, y_std = clf.fit(X, y).predict(X, return_std=True) assert_array_less(y_std, expected_upper_boundary)
def test_prediction_bayesian_ridge_ard_with_constant_input(): # Test BayesianRidge and ARDRegression predictions for edge case of # constant target vectors n_samples = 4 n_features = 5 random_state = check_random_state(42) constant_value = random_state.rand() X = random_state.random_sample((n_samples, n_features)) y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype) expected = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype) for clf in [BayesianRidge(), ARDRegression()]: y_pred = clf.fit(X, y).predict(X) assert_array_almost_equal(y_pred, expected)