Example #1
0
def test_return_std():
    # Test return_std option for both Bayesian regressors
    def f(X):
        return np.dot(X, w) + b

    def f_noise(X, noise_mult):
        return f(X) + np.random.randn(X.shape[0]) * noise_mult

    d = 5
    n_train = 50
    n_test = 10

    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
    b = 1.0

    X = np.random.random((n_train, d))
    X_test = np.random.random((n_test, d))

    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
        y = f_noise(X, noise_mult)

        m1 = BayesianRidge()
        m1.fit(X, y)
        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)

        m2 = ARDRegression()
        m2.fit(X, y)
        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
Example #2
0
def test_return_std():
    # Test return_std option for both Bayesian regressors
    def f(X):
        return np.dot(X, w) + b

    def f_noise(X, noise_mult):
        return f(X) + np.random.randn(X.shape[0]) * noise_mult

    d = 5
    n_train = 50
    n_test = 10

    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
    b = 1.0

    X = np.random.random((n_train, d))
    X_test = np.random.random((n_test, d))

    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
        y = f_noise(X, noise_mult)

        m1 = BayesianRidge()
        m1.fit(X, y)
        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)

        m2 = ARDRegression()
        m2.fit(X, y)
        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
Example #3
0
class BayesianRidgeImpl():

    def __init__(self, n_iter=300, tol=0.001, alpha_1=1e-06, alpha_2=1e-06, lambda_1=1e-06, lambda_2=1e-06, compute_score=False, fit_intercept=True, normalize=False, copy_X=True, verbose=False):
        self._hyperparams = {
            'n_iter': n_iter,
            'tol': tol,
            'alpha_1': alpha_1,
            'alpha_2': alpha_2,
            'lambda_1': lambda_1,
            'lambda_2': lambda_2,
            'compute_score': compute_score,
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'copy_X': copy_X,
            'verbose': verbose}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Example #4
0
def test_bayesian_ridge_scores():
    """Check scores attribute shape"""
    X, y = diabetes.data, diabetes.target

    clf = BayesianRidge(compute_score=True)
    clf.fit(X, y)

    assert clf.scores_.shape == (clf.n_iter_ + 1, )
Example #5
0
def test_bayesian_ridge_scores():
    """Check scores attribute shape"""
    X, y = diabetes.data, diabetes.target

    clf = BayesianRidge(compute_score=True)
    clf.fit(X, y)

    assert clf.scores_.shape == (clf.n_iter_ + 1,)
Example #6
0
def test_toy_bayesian_ridge_object():
    """
    Test BayesianRidge on toy
    """
    X = np.array([[1], [2], [6], [8], [10]])
    Y = np.array([1, 2, 6, 8, 10])
    clf = BayesianRidge(compute_score=True)
    clf.fit(X, Y)
    X_test = [[1], [3], [4]]
    assert(np.abs(clf.predict(X_test) - [1, 3, 4]).sum() < 1.e-2)  # identity
Example #7
0
def test_toy_bayesian_ridge_object():
    # Test BayesianRidge on toy
    X = np.array([[1], [2], [6], [8], [10]])
    Y = np.array([1, 2, 6, 8, 10])
    clf = BayesianRidge(compute_score=True)
    clf.fit(X, Y)

    # Check that the model could approximately learn the identity function
    test = [[1], [3], [4]]
    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
Example #8
0
def test_toy_bayesian_ridge_object():
    """
    Test BayesianRidge on toy
    """
    X = np.array([[1], [2], [6], [8], [10]])
    Y = np.array([1, 2, 6, 8, 10])
    clf = BayesianRidge(compute_score=True)
    clf.fit(X, Y)
    X_test = [[1], [3], [4]]
    assert (np.abs(clf.predict(X_test) - [1, 3, 4]).sum() < 1.e-2)  # identity
def test_bayesian_initial_params():
    # Test BayesianRidge with initial values (alpha_init, lambda_init)
    X = np.vander(np.linspace(0, 4, 5), 4)
    y = np.array([0., 1., 0., -1., 0.])    # y = (x^3 - 6x^2 + 8x) / 3

    # In this case, starting from the default initial values will increase
    # the bias of the fitted curve. So, lambda_init should be small.
    reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
    # Check the R2 score nearly equals to one.
    r2 = reg.fit(X, y).score(X, y)
    assert_almost_equal(r2, 1.)
Example #10
0
def test_bayesian_initial_params():
    # Test BayesianRidge with initial values (alpha_init, lambda_init)
    X = np.vander(np.linspace(0, 4, 5), 4)
    y = np.array([0., 1., 0., -1., 0.])  # y = (x^3 - 6x^2 + 8x) / 3

    # In this case, starting from the default initial values will increase
    # the bias of the fitted curve. So, lambda_init should be small.
    reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
    # Check the R2 score nearly equals to one.
    r2 = reg.fit(X, y).score(X, y)
    assert_almost_equal(r2, 1.)
Example #11
0
def test_n_iter():
    """Check value of n_iter."""
    X = np.array([[1], [2], [6], [8], [10]])
    y = np.array([1, 2, 6, 8, 10])
    clf = BayesianRidge(n_iter=0)
    msg = "n_iter should be greater than or equal to 1."
    assert_raise_message(ValueError, msg, clf.fit, X, y)
Example #12
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Example #13
0
def test_bayesian_ridge_score_values():
    """Check value of score on toy example.

    Compute log marginal likelihood with equation (36) in Sparse Bayesian
    Learning and the Relevance Vector Machine (Tipping, 2001):

    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
    + lambda_1 * log(lambda) - lambda_2 * lambda
    + alpha_1 * log(alpha) - alpha_2 * alpha

    and check equality with the score computed during training.
    """

    X, y = diabetes.data, diabetes.target
    n_samples = X.shape[0]
    # check with initial values of alpha and lambda (see code for the values)
    eps = np.finfo(np.float64).eps
    alpha_ = 1. / (np.var(y) + eps)
    lambda_ = 1.

    # value of the parameters of the Gamma hyperpriors
    alpha_1 = 0.1
    alpha_2 = 0.1
    lambda_1 = 0.1
    lambda_2 = 0.1

    # compute score using formula of docstring
    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
    M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
    M_inv = pinvh(M)
    score += -0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
                     n_samples * log(2 * np.pi))

    # compute score with BayesianRidge
    clf = BayesianRidge(alpha_1=alpha_1,
                        alpha_2=alpha_2,
                        lambda_1=lambda_1,
                        lambda_2=lambda_2,
                        n_iter=1,
                        fit_intercept=False,
                        compute_score=True)
    clf.fit(X, y)

    assert_almost_equal(clf.scores_[0], score, decimal=9)
Example #14
0
def test_bayesian_ridge_score_values():
    """Check value of score on toy example.

    Compute log marginal likelihood with equation (36) in Sparse Bayesian
    Learning and the Relevance Vector Machine (Tipping, 2001):

    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
    + lambda_1 * log(lambda) - lambda_2 * lambda
    + alpha_1 * log(alpha) - alpha_2 * alpha

    and check equality with the score computed during training.
    """

    X, y = diabetes.data, diabetes.target
    n_samples = X.shape[0]
    # check with initial values of alpha and lambda (see code for the values)
    eps = np.finfo(np.float64).eps
    alpha_ = 1. / (np.var(y) + eps)
    lambda_ = 1.

    # value of the parameters of the Gamma hyperpriors
    alpha_1 = 0.1
    alpha_2 = 0.1
    lambda_1 = 0.1
    lambda_2 = 0.1

    # compute score using formula of docstring
    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
    M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
    M_inv = pinvh(M)
    score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
                      n_samples * log(2 * np.pi))

    # compute score with BayesianRidge
    clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
                        lambda_1=lambda_1, lambda_2=lambda_2,
                        n_iter=1, fit_intercept=False, compute_score=True)
    clf.fit(X, y)

    assert_almost_equal(clf.scores_[0], score, decimal=9)
Example #15
0
def test_bayesian_ridge_parameter():
    # Test correctness of lambda_ and alpha_ parameters (Github issue #8224)
    X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
    y = np.array([1, 2, 3, 2, 0, 4, 5]).T

    # A Ridge regression model using an alpha value equal to the ratio of
    # lambda_ and alpha_ from the Bayesian Ridge model must be identical
    br_model = BayesianRidge(compute_score=True).fit(X, y)
    rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
    assert_array_almost_equal(rr_model.coef_, br_model.coef_)
    assert_almost_equal(rr_model.intercept_, br_model.intercept_)
Example #16
0
def test_bayesian_sample_weights():
    # Test correctness of the sample_weights method
    X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
    y = np.array([1, 2, 3, 2, 0, 4, 5]).T
    w = np.array([4, 3, 3, 1, 1, 2, 3]).T

    # A Ridge regression model using an alpha value equal to the ratio of
    # lambda_ and alpha_ from the Bayesian Ridge model must be identical
    br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
    rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
        X, y, sample_weight=w)
    assert_array_almost_equal(rr_model.coef_, br_model.coef_)
    assert_almost_equal(rr_model.intercept_, br_model.intercept_)
Example #17
0
def test_prediction_bayesian_ridge_ard_with_constant_input():
    # Test BayesianRidge and ARDRegression predictions for edge case of
    # constant target vectors
    n_samples = 4
    n_features = 5
    random_state = check_random_state(42)
    constant_value = random_state.rand()
    X = random_state.random_sample((n_samples, n_features))
    y = np.full(n_samples, constant_value)
    expected = np.full(n_samples, constant_value)

    for clf in [BayesianRidge(), ARDRegression()]:
        y_pred = clf.fit(X, y).predict(X)
        assert_array_almost_equal(y_pred, expected)
Example #18
0
 def __init__(self, n_iter=300, tol=0.001, alpha_1=1e-06, alpha_2=1e-06, lambda_1=1e-06, lambda_2=1e-06, compute_score=False, fit_intercept=True, normalize=False, copy_X=True, verbose=False):
     self._hyperparams = {
         'n_iter': n_iter,
         'tol': tol,
         'alpha_1': alpha_1,
         'alpha_2': alpha_2,
         'lambda_1': lambda_1,
         'lambda_2': lambda_2,
         'compute_score': compute_score,
         'fit_intercept': fit_intercept,
         'normalize': normalize,
         'copy_X': copy_X,
         'verbose': verbose}
     self._wrapped_model = SKLModel(**self._hyperparams)
Example #19
0
def test_std_bayesian_ridge_ard_with_constant_input():
    # Test BayesianRidge and ARDRegression standard dev. for edge case of
    # constant target vector
    # The standard dev. should be relatively small (< 0.01 is tested here)
    n_samples = 4
    n_features = 5
    random_state = check_random_state(42)
    constant_value = random_state.rand()
    X = random_state.random_sample((n_samples, n_features))
    y = np.full(n_samples, constant_value)
    expected_upper_boundary = 0.01

    for clf in [BayesianRidge(), ARDRegression()]:
        _, y_std = clf.fit(X, y).predict(X, return_std=True)
        assert_array_less(y_std, expected_upper_boundary)
Example #20
0
def test_bayesian_on_diabetes():
    # Test BayesianRidge on diabetes
    raise SkipTest("XFailed Test")
    diabetes = datasets.load_diabetes()
    X, y = diabetes.data, diabetes.target

    clf = BayesianRidge(compute_score=True)

    # Test with more samples than features
    clf.fit(X, y)
    # Test that scores are increasing at each iteration
    assert_array_equal(np.diff(clf.scores_) > 0, True)

    # Test with more features than samples
    X = X[:5, :]
    y = y[:5]
    clf.fit(X, y)
    # Test that scores are increasing at each iteration
    assert_array_equal(np.diff(clf.scores_) > 0, True)
Example #21
0
from sklearn.feature_selection.variance_threshold import VarianceThreshold

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


clf_dict = {'ARDRegression':ARDRegression(),
			'AdaBoostClassifier':AdaBoostClassifier(),
			'AdaBoostRegressor':AdaBoostRegressor(),
			'AdditiveChi2Sampler':AdditiveChi2Sampler(),
			'AffinityPropagation':AffinityPropagation(),
			'AgglomerativeClustering':AgglomerativeClustering(),
			'BaggingClassifier':BaggingClassifier(),
			'BaggingRegressor':BaggingRegressor(),
			'BayesianGaussianMixture':BayesianGaussianMixture(),
			'BayesianRidge':BayesianRidge(),
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
			'DecisionTreeClassifier':DecisionTreeClassifier(),
			'DecisionTreeRegressor':DecisionTreeRegressor(),
			'DictionaryLearning':DictionaryLearning(),
			'ElasticNet':ElasticNet(),
			'ElasticNetCV':ElasticNetCV(),
			'EmpiricalCovariance':EmpiricalCovariance(),
			'ExtraTreeClassifier':ExtraTreeClassifier(),