Ejemplo n.º 1
0
 def test_baseboostcv_score(self):
     X_train, X_test, y_train, y_test = load_benchmark(return_split=True)
     stack = dict(regressors=['ridge', 'lgbmregressor'],
                  final_regressor='ridge')
     line_search_options = dict(init_guess=1,
                                opt_method='minimize',
                                method='Nelder-Mead',
                                tol=1e-7,
                                options={"maxiter": 10000},
                                niter=None,
                                T=None,
                                loss='lad',
                                regularization=0.1)
     base_boosting_options = dict(n_regressors=3,
                                  boosting_loss='ls',
                                  line_search_options=line_search_options)
     reg = Regressor(regressor_choice='stackingregressor',
                     target_index=0,
                     stacking_options=dict(layers=stack),
                     base_boosting_options=base_boosting_options)
     y_pred = reg.baseboostcv(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertNotHasAttr(reg, 'return_incumbent_')
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 2.0)
     self.assertLess(score['mse'].values, 6.2)
Ejemplo n.º 2
0
 def test_pipeline_clone_fit_score(self):
     X, y = load_boston(return_X_y=True)
     X, y = pd.DataFrame(X), pd.Series(y)
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         random_state=42)
     transformer_list = [('pca', PCA(n_components=1)),
                         ('svd', TruncatedSVD(n_components=2))]
     union = FeatureUnion(transformer_list=transformer_list, n_jobs=-1)
     params = dict(n_estimators=10,
                   objective='reg:squarederror',
                   booster='gbtree')
     reg = Regressor(regressor_choice='xgbregressor',
                     pipeline_transform=('tr', union),
                     params=params)
     reg.get_pipeline(y=y_train)
     _class_before_clone = reg.pipe.__class__
     reg.pipe = clone(reg.pipe)
     y_pred = reg.fit(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertEqual(_class_before_clone, reg.pipe.__class__)
     self.assertCountEqual(y_pred.index, y_test.index)
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 11.0)
     self.assertLess(score['mse'].values, 232.0)
Ejemplo n.º 3
0
 def test_with_cv_pipeline_clone_fit_score(self):
     X, y = load_boston(return_X_y=True)
     X, y = pd.DataFrame(X), pd.Series(y)
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         random_state=42)
     transformer_list = [('pca', PCA(n_components=1)),
                         ('svd', TruncatedSVD(n_components=2))]
     union = FeatureUnion(transformer_list=transformer_list, n_jobs=-1)
     stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                  final_regressor='lasso')
     reg = Regressor(regressor_choice='mlxtendstackingcvregressor',
                     pipeline_transform=('tr', union),
                     stacking_options=dict(layers=stack))
     reg.get_pipeline(y=y_train)
     _class_before_clone = reg.pipe.__class__
     reg.pipe = clone(reg.pipe)
     y_pred = reg.fit(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertEqual(_class_before_clone, reg.pipe.__class__)
     self.assertCountEqual(y_pred.index, y_test.index)
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 11.0)
     self.assertLess(score['mse'].values, 232.0)
Ejemplo n.º 4
0
 def test_benchmark(self):
     _, X_test, _, y_test = load_benchmark(return_split=True)
     reg = Regressor()
     score = reg.score(y_test, X_test)
     self.assertEqual(score['mae'].mean().round(decimals=2), 1.34)
     self.assertEqual(score['mse'].mean().round(decimals=2), 4.19)
     self.assertEqual(score['rmse'].mean().round(decimals=2), 1.88)
     self.assertEqual(score['r2'].mean().round(decimals=2), 0.99)
     self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
Ejemplo n.º 5
0
    def test_multioutput_regressor_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler')
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 11.0)
        self.assertLess(score['mse'], 232.0)
Ejemplo n.º 6
0
    def test_multioutput_regressorchain_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        params = dict(n_estimators=10, objective='reg:squarederror', booster='gbtree')
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler',
                        params=params, chain_order=[0, 2, 1])
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 14.0)
        self.assertLess(score['mse'], 430.0)
Ejemplo n.º 7
0
    def test_multioutput_regressorchain_with_cv_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')
        reg = Regressor(regressor_choice='mlxtendstackingcvregressor', stacking_layer=stack,
                        pipeline_transform='standardscaler', chain_order=[2, 0, 1])
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 7.0)
        self.assertLess(score['mse'], 110.0)
Ejemplo n.º 8
0
    def test_regressor_fit_score(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler')
        reg.fit(X_train, y_train)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred)
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'].values, 0.0)
        self.assertGreaterEqual(score['mse'].values, 0.0)
        self.assertLess(score['mae'].values, 3.1)
        self.assertLess(score['mse'].values, 23.0)
Ejemplo n.º 9
0
    def test_regressor_fit_score(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        params = dict(n_estimators=10, objective='reg:squarederror', booster='gbtree')
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler',
                        params=params)
        reg.fit(X_train, y_train)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred)
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'].values, 0.0)
        self.assertGreaterEqual(score['mse'].values, 0.0)
        self.assertLess(score['mae'].values, 2.5)
        self.assertLess(score['mse'].values, 14.0)
Ejemplo n.º 10
0
    def test_stacking_regressor_with_cv_fit_score(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')
        reg = Regressor(regressor_choice='mlxtendstackingcvregressor', stacking_layer=stack,
                        pipeline_transform='standardscaler')
        reg.fit(X_train, y_train)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred)
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'].values, 0.0)
        self.assertGreaterEqual(score['mse'].values, 0.0)
        self.assertLess(score['mae'].values, 2.7)
        self.assertLess(score['mse'].values, 19.0)
Ejemplo n.º 11
0
    def test_multioutput_regressor_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(n_estimators=3, objective='mean_squared_error')
        reg = Regressor(regressor_choice='lgbmregressor',
                        pipeline_transform='standardscaler',
                        params=params)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 8.1)
        self.assertLess(score['mse'], 122.5)
Ejemplo n.º 12
0
    def test_multioutput_regressorchain_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(iterations=10, loss_function='RMSE')
        reg = Regressor(regressor_choice='catboostregressor',
                        pipeline_transform='standardscaler',
                        params=params,
                        chain_order=[0, 2, 1])
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 11.0)
        self.assertLess(score['mse'], 240.0)
Ejemplo n.º 13
0
    def test_regressor_fit_score(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(iterations=10, loss_function='RMSE')
        reg = Regressor(regressor_choice='catboostregressor',
                        pipeline_transform='standardscaler',
                        params=params)
        reg.fit(X_train, y_train)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred)
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'].values, 0.0)
        self.assertGreaterEqual(score['mse'].values, 0.0)
        self.assertLess(score['mae'].values, 2.7)
        self.assertLess(score['mse'].values, 18.0)
Ejemplo n.º 14
0
# Choose the underlying regressor to be the Sklearn
# histogram-based gradient boosting regressor.
regressor_choice = 'HistGradientBoostingRegressor'

# Choose the Sklearn QuantileTransformer as the data preprocessor.
# The output distribution is the Gaussian, e.g., 'normal'.
# The number of quantiles is the number of examples in y_train,
# e.g., 15.
pipeline_transform = 'quantilenormal'

# Make an instance of the Regressor object.
reg = Regressor(regressor_choice=regressor_choice,
                pipeline_transform=pipeline_transform)

# Greedily fit the additive model
reg.fit(X_train, y_train)

# Generate test data predictions
y_pred = reg.fit(X_train, y_train).predict(X_test)

# Evaluate the test error, and store
# the results as a DataFrame
score = reg.score(y_test, y_pred)

# Print the mean absolute error, mean squared error,
# root mean squared error, R2, the expected variance,
# and the mean squared log error for each individual
# single-target regression subtask.
print(score)
Ejemplo n.º 15
0
"""
============================
Benchmark test error
============================

This example generates the incumbent state-of-the-art's
test error on the benchmark task. 
"""

# Author: Alex Wozniakowski <*****@*****.**>

from physlearn import Regressor
from physlearn.datasets import load_benchmark

# To comput the benchmark error, we only need the test data.
# We denote the initial prediction examples as X_test and
# the multi-targets as y_test. Both have the same shape,
# namely (41, 5).
_, X_test, _, y_test = load_benchmark(return_split=True)

# Here we make an instance of Regressor, so that we can
# automatically compute the test error as a DataFrame.
reg = Regressor()
test_error = reg.score(y_test, X_test)

print('The single-target test error:')
print(test_error.round(decimals=2))
print('The benchmark error:')
print(test_error.mean().round(decimals=2))