def test_return_std(): # Test return_std option for both Bayesian regressors def f(X): return np.dot(X, w) + b def f_noise(X, noise_mult): return f(X) + np.random.randn(X.shape[0]) * noise_mult d = 5 n_train = 50 n_test = 10 w = np.array([1.0, 0.0, 1.0, -1.0, 0.0]) b = 1.0 X = np.random.random((n_train, d)) X_test = np.random.random((n_test, d)) for decimal, noise_mult in enumerate([1, 0.1, 0.01]): y = f_noise(X, noise_mult) m1 = BayesianRidge() m1.fit(X, y) y_mean1, y_std1 = m1.predict(X_test, return_std=True) assert_array_almost_equal(y_std1, noise_mult, decimal=decimal) m2 = ARDRegression() m2.fit(X, y) y_mean2, y_std2 = m2.predict(X_test, return_std=True) assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
class ARDRegressionImpl(): def __init__(self, n_iter=300, tol=0.001, alpha_1=1e-06, alpha_2=1e-06, lambda_1=1e-06, lambda_2=1e-06, compute_score=False, threshold_lambda=10000.0, fit_intercept=True, normalize=False, copy_X=True, verbose=False): self._hyperparams = { 'n_iter': n_iter, 'tol': tol, 'alpha_1': alpha_1, 'alpha_2': alpha_2, 'lambda_1': lambda_1, 'lambda_2': lambda_2, 'compute_score': compute_score, 'threshold_lambda': threshold_lambda, 'fit_intercept': fit_intercept, 'normalize': normalize, 'copy_X': copy_X, 'verbose': verbose} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def test_toy_ard_object(): # Test BayesianRegression ARD classifier X = np.array([[1], [2], [3]]) Y = np.array([1, 2, 3]) clf = ARDRegression(compute_score=True) clf.fit(X, Y) # Check that the model could approximately learn the identity function test = [[1], [3], [4]] assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
def test_toy_ard_object(): """ Test BayesianRegression ARD classifier """ X = np.array([[1], [2], [3]]) Y = np.array([1, 2, 3]) clf = ARDRegression(compute_score=True) clf.fit(X, Y) test = [[1], [3], [4]] assert(np.abs(clf.predict(test) - [1, 3, 4]).sum() < 1.e-3) # identity
def test_toy_ard_object(): """ Test BayesianRegression ARD classifier """ X = np.array([[1], [2], [3]]) Y = np.array([1, 2, 3]) clf = ARDRegression(compute_score=True) clf.fit(X, Y) test = [[1], [3], [4]] assert (np.abs(clf.predict(test) - [1, 3, 4]).sum() < 1.e-3) # identity
def test_update_of_sigma_in_ard(): # Checks that `sigma_` is updated correctly after the last iteration # of the ARDRegression algorithm. See issue #10128. X = np.array([[1, 0], [0, 0]]) y = np.array([0, 0]) clf = ARDRegression(n_iter=1) clf.fit(X, y) # With the inputs above, ARDRegression prunes one of the two coefficients # in the first iteration. Hence, the expected shape of `sigma_` is (1, 1). assert_equal(clf.sigma_.shape, (1, 1)) # Ensure that no error is thrown at prediction stage clf.predict(X, return_std=True)
def test_ard_accuracy_on_easy_problem(): # Check that ARD converges with reasonable accuracy on an easy problem # (Github issue #14055) # This particular seed seems to converge poorly in the failure-case # (scipy==1.3.0, sklearn==0.21.2) seed = 45 X = np.random.RandomState(seed=seed).normal(size=(250, 3)) y = X[:, 1] regressor = ARDRegression() regressor.fit(X, y) abs_coef_error = np.abs(1 - regressor.coef_[1]) # Expect an accuracy of better than 1E-4 in most cases - # Failure-case produces 0.16! assert abs_coef_error < 0.01
def __init__(self, n_iter=300, tol=0.001, alpha_1=1e-06, alpha_2=1e-06, lambda_1=1e-06, lambda_2=1e-06, compute_score=False, threshold_lambda=10000.0, fit_intercept=True, normalize=False, copy_X=True, verbose=False): self._hyperparams = { 'n_iter': n_iter, 'tol': tol, 'alpha_1': alpha_1, 'alpha_2': alpha_2, 'lambda_1': lambda_1, 'lambda_2': lambda_2, 'compute_score': compute_score, 'threshold_lambda': threshold_lambda, 'fit_intercept': fit_intercept, 'normalize': normalize, 'copy_X': copy_X, 'verbose': verbose } self._wrapped_model = Op(**self._hyperparams)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_prediction_bayesian_ridge_ard_with_constant_input(): # Test BayesianRidge and ARDRegression predictions for edge case of # constant target vectors n_samples = 4 n_features = 5 random_state = check_random_state(42) constant_value = random_state.rand() X = random_state.random_sample((n_samples, n_features)) y = np.full(n_samples, constant_value) expected = np.full(n_samples, constant_value) for clf in [BayesianRidge(), ARDRegression()]: y_pred = clf.fit(X, y).predict(X) assert_array_almost_equal(y_pred, expected)
def test_std_bayesian_ridge_ard_with_constant_input(): # Test BayesianRidge and ARDRegression standard dev. for edge case of # constant target vector # The standard dev. should be relatively small (< 0.01 is tested here) n_samples = 4 n_features = 5 random_state = check_random_state(42) constant_value = random_state.rand() X = random_state.random_sample((n_samples, n_features)) y = np.full(n_samples, constant_value) expected_upper_boundary = 0.01 for clf in [BayesianRidge(), ARDRegression()]: _, y_std = clf.fit(X, y).predict(X, return_std=True) assert_array_less(y_std, expected_upper_boundary)
"Radius Neighbors", "MLP", "Decision Tree", "Extra Tree", "SVR" ] classifiers = [ RandomForestRegressor(n_estimators=200, n_jobs=5, random_state=randomstate), ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate), # GradientBoostingRegressor(random_state=randomstate), # learning_rate is a hyper-parameter in the range (0.0, 1.0] # HistGradientBoostingClassifier(random_state=randomstate), # learning_rate is a hyper-parameter in the range (0.0, 1.0] AdaBoostRegressor(n_estimators=200, random_state=randomstate), GaussianProcessRegressor(normalize_y=True), ARDRegression(), # HuberRegressor(), # epsilon: greater than 1.0, default 1.35 LinearRegression(n_jobs=5), PassiveAggressiveRegressor( random_state=randomstate), # C: 0.25, 0.5, 1, 5, 10 SGDRegressor(random_state=randomstate), TheilSenRegressor(n_jobs=5, random_state=randomstate), RANSACRegressor(random_state=randomstate), KNeighborsRegressor( weights='distance'), # n_neighbors: 3, 6, 9, 12, 15, 20 RadiusNeighborsRegressor(weights='distance'), # radius: 1, 2, 5, 10, 15 MLPRegressor(max_iter=10000000, random_state=randomstate), DecisionTreeRegressor( random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 ExtraTreeRegressor(random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 SVR() # C: 0.25, 0.5, 1, 5, 10
from sklearn.random_projection import SparseRandomProjection from sklearn.cluster.bicluster import SpectralBiclustering from sklearn.cluster.spectral import SpectralClustering from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.manifold.spectral_embedding_ import SpectralEmbedding from sklearn.preprocessing.data import StandardScaler from sklearn.manifold.t_sne import TSNE from sklearn.linear_model.theil_sen import TheilSenRegressor from sklearn.mixture.dpgmm import VBGMM from sklearn.feature_selection.variance_threshold import VarianceThreshold import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) clf_dict = {'ARDRegression':ARDRegression(), 'AdaBoostClassifier':AdaBoostClassifier(), 'AdaBoostRegressor':AdaBoostRegressor(), 'AdditiveChi2Sampler':AdditiveChi2Sampler(), 'AffinityPropagation':AffinityPropagation(), 'AgglomerativeClustering':AgglomerativeClustering(), 'BaggingClassifier':BaggingClassifier(), 'BaggingRegressor':BaggingRegressor(), 'BayesianGaussianMixture':BayesianGaussianMixture(), 'BayesianRidge':BayesianRidge(), 'BernoulliNB':BernoulliNB(), 'BernoulliRBM':BernoulliRBM(), 'Binarizer':Binarizer(), 'Birch':Birch(), 'CCA':CCA(), 'CalibratedClassifierCV':CalibratedClassifierCV(),