Example #1
0
def test_return_std():
    # Test return_std option for both Bayesian regressors
    def f(X):
        return np.dot(X, w) + b

    def f_noise(X, noise_mult):
        return f(X) + np.random.randn(X.shape[0]) * noise_mult

    d = 5
    n_train = 50
    n_test = 10

    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
    b = 1.0

    X = np.random.random((n_train, d))
    X_test = np.random.random((n_test, d))

    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
        y = f_noise(X, noise_mult)

        m1 = BayesianRidge()
        m1.fit(X, y)
        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)

        m2 = ARDRegression()
        m2.fit(X, y)
        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
Example #2
0
def test_return_std():
    # Test return_std option for both Bayesian regressors
    def f(X):
        return np.dot(X, w) + b

    def f_noise(X, noise_mult):
        return f(X) + np.random.randn(X.shape[0]) * noise_mult

    d = 5
    n_train = 50
    n_test = 10

    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
    b = 1.0

    X = np.random.random((n_train, d))
    X_test = np.random.random((n_test, d))

    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
        y = f_noise(X, noise_mult)

        m1 = BayesianRidge()
        m1.fit(X, y)
        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)

        m2 = ARDRegression()
        m2.fit(X, y)
        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
Example #3
0
class ARDRegressionImpl():

    def __init__(self, n_iter=300, tol=0.001, alpha_1=1e-06, alpha_2=1e-06, lambda_1=1e-06, lambda_2=1e-06, compute_score=False, threshold_lambda=10000.0, fit_intercept=True, normalize=False, copy_X=True, verbose=False):
        self._hyperparams = {
            'n_iter': n_iter,
            'tol': tol,
            'alpha_1': alpha_1,
            'alpha_2': alpha_2,
            'lambda_1': lambda_1,
            'lambda_2': lambda_2,
            'compute_score': compute_score,
            'threshold_lambda': threshold_lambda,
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'copy_X': copy_X,
            'verbose': verbose}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Example #4
0
def test_toy_ard_object():
    # Test BayesianRegression ARD classifier
    X = np.array([[1], [2], [3]])
    Y = np.array([1, 2, 3])
    clf = ARDRegression(compute_score=True)
    clf.fit(X, Y)

    # Check that the model could approximately learn the identity function
    test = [[1], [3], [4]]
    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
Example #5
0
def test_toy_ard_object():
    """
    Test BayesianRegression ARD classifier
    """
    X = np.array([[1], [2], [3]])
    Y = np.array([1, 2, 3])
    clf = ARDRegression(compute_score=True)
    clf.fit(X, Y)
    test = [[1], [3], [4]]
    assert(np.abs(clf.predict(test) - [1, 3, 4]).sum() < 1.e-3)  # identity
Example #6
0
def test_toy_ard_object():
    """
    Test BayesianRegression ARD classifier
    """
    X = np.array([[1], [2], [3]])
    Y = np.array([1, 2, 3])
    clf = ARDRegression(compute_score=True)
    clf.fit(X, Y)
    test = [[1], [3], [4]]
    assert (np.abs(clf.predict(test) - [1, 3, 4]).sum() < 1.e-3)  # identity
Example #7
0
def test_update_of_sigma_in_ard():
    # Checks that `sigma_` is updated correctly after the last iteration
    # of the ARDRegression algorithm. See issue #10128.
    X = np.array([[1, 0],
                  [0, 0]])
    y = np.array([0, 0])
    clf = ARDRegression(n_iter=1)
    clf.fit(X, y)
    # With the inputs above, ARDRegression prunes one of the two coefficients
    # in the first iteration. Hence, the expected shape of `sigma_` is (1, 1).
    assert_equal(clf.sigma_.shape, (1, 1))
    # Ensure that no error is thrown at prediction stage
    clf.predict(X, return_std=True)
Example #8
0
def test_ard_accuracy_on_easy_problem():
    # Check that ARD converges with reasonable accuracy on an easy problem
    # (Github issue #14055)
    # This particular seed seems to converge poorly in the failure-case
    # (scipy==1.3.0, sklearn==0.21.2)
    seed = 45
    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
    y = X[:, 1]

    regressor = ARDRegression()
    regressor.fit(X, y)

    abs_coef_error = np.abs(1 - regressor.coef_[1])
    # Expect an accuracy of better than 1E-4 in most cases -
    # Failure-case produces 0.16!
    assert abs_coef_error < 0.01
Example #9
0
 def __init__(self,
              n_iter=300,
              tol=0.001,
              alpha_1=1e-06,
              alpha_2=1e-06,
              lambda_1=1e-06,
              lambda_2=1e-06,
              compute_score=False,
              threshold_lambda=10000.0,
              fit_intercept=True,
              normalize=False,
              copy_X=True,
              verbose=False):
     self._hyperparams = {
         'n_iter': n_iter,
         'tol': tol,
         'alpha_1': alpha_1,
         'alpha_2': alpha_2,
         'lambda_1': lambda_1,
         'lambda_2': lambda_2,
         'compute_score': compute_score,
         'threshold_lambda': threshold_lambda,
         'fit_intercept': fit_intercept,
         'normalize': normalize,
         'copy_X': copy_X,
         'verbose': verbose
     }
     self._wrapped_model = Op(**self._hyperparams)
Example #10
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Example #11
0
def test_prediction_bayesian_ridge_ard_with_constant_input():
    # Test BayesianRidge and ARDRegression predictions for edge case of
    # constant target vectors
    n_samples = 4
    n_features = 5
    random_state = check_random_state(42)
    constant_value = random_state.rand()
    X = random_state.random_sample((n_samples, n_features))
    y = np.full(n_samples, constant_value)
    expected = np.full(n_samples, constant_value)

    for clf in [BayesianRidge(), ARDRegression()]:
        y_pred = clf.fit(X, y).predict(X)
        assert_array_almost_equal(y_pred, expected)
Example #12
0
def test_std_bayesian_ridge_ard_with_constant_input():
    # Test BayesianRidge and ARDRegression standard dev. for edge case of
    # constant target vector
    # The standard dev. should be relatively small (< 0.01 is tested here)
    n_samples = 4
    n_features = 5
    random_state = check_random_state(42)
    constant_value = random_state.rand()
    X = random_state.random_sample((n_samples, n_features))
    y = np.full(n_samples, constant_value)
    expected_upper_boundary = 0.01

    for clf in [BayesianRidge(), ARDRegression()]:
        _, y_std = clf.fit(X, y).predict(X, return_std=True)
        assert_array_less(y_std, expected_upper_boundary)
Example #13
0
def test_update_of_sigma_in_ard():
    # Checks that `sigma_` is updated correctly after the last iteration
    # of the ARDRegression algorithm. See issue #10128.
    X = np.array([[1, 0], [0, 0]])
    y = np.array([0, 0])
    clf = ARDRegression(n_iter=1)
    clf.fit(X, y)
    # With the inputs above, ARDRegression prunes one of the two coefficients
    # in the first iteration. Hence, the expected shape of `sigma_` is (1, 1).
    assert_equal(clf.sigma_.shape, (1, 1))
    # Ensure that no error is thrown at prediction stage
    clf.predict(X, return_std=True)
Example #14
0
    "Radius Neighbors",
    "MLP",
    "Decision Tree",
    "Extra Tree",
    "SVR"
]

classifiers = [
    RandomForestRegressor(n_estimators=200, n_jobs=5,
                          random_state=randomstate),
    ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate),
    # GradientBoostingRegressor(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    # HistGradientBoostingClassifier(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    AdaBoostRegressor(n_estimators=200, random_state=randomstate),
    GaussianProcessRegressor(normalize_y=True),
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
    MLPRegressor(max_iter=10000000, random_state=randomstate),
    DecisionTreeRegressor(
        random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    ExtraTreeRegressor(random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    SVR()  # C: 0.25, 0.5, 1, 5, 10
Example #15
0
from sklearn.random_projection import SparseRandomProjection
from sklearn.cluster.bicluster import SpectralBiclustering
from sklearn.cluster.spectral import SpectralClustering
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.manifold.spectral_embedding_ import SpectralEmbedding
from sklearn.preprocessing.data import StandardScaler
from sklearn.manifold.t_sne import TSNE
from sklearn.linear_model.theil_sen import TheilSenRegressor
from sklearn.mixture.dpgmm import VBGMM
from sklearn.feature_selection.variance_threshold import VarianceThreshold

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


clf_dict = {'ARDRegression':ARDRegression(),
			'AdaBoostClassifier':AdaBoostClassifier(),
			'AdaBoostRegressor':AdaBoostRegressor(),
			'AdditiveChi2Sampler':AdditiveChi2Sampler(),
			'AffinityPropagation':AffinityPropagation(),
			'AgglomerativeClustering':AgglomerativeClustering(),
			'BaggingClassifier':BaggingClassifier(),
			'BaggingRegressor':BaggingRegressor(),
			'BayesianGaussianMixture':BayesianGaussianMixture(),
			'BayesianRidge':BayesianRidge(),
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),