def test_baseboostcv_score(self): X_train, X_test, y_train, y_test = load_benchmark(return_split=True) stack = dict(regressors=['ridge', 'lgbmregressor'], final_regressor='ridge') line_search_options = dict(init_guess=1, opt_method='minimize', method='Nelder-Mead', tol=1e-7, options={"maxiter": 10000}, niter=None, T=None, loss='lad', regularization=0.1) base_boosting_options = dict(n_regressors=3, boosting_loss='ls', line_search_options=line_search_options) reg = Regressor(regressor_choice='stackingregressor', target_index=0, stacking_options=dict(layers=stack), base_boosting_options=base_boosting_options) y_pred = reg.baseboostcv(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertNotHasAttr(reg, 'return_incumbent_') self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 2.0) self.assertLess(score['mse'].values, 6.2)
def test_pipeline_clone_fit_score(self): X, y = load_boston(return_X_y=True) X, y = pd.DataFrame(X), pd.Series(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) transformer_list = [('pca', PCA(n_components=1)), ('svd', TruncatedSVD(n_components=2))] union = FeatureUnion(transformer_list=transformer_list, n_jobs=-1) params = dict(n_estimators=10, objective='reg:squarederror', booster='gbtree') reg = Regressor(regressor_choice='xgbregressor', pipeline_transform=('tr', union), params=params) reg.get_pipeline(y=y_train) _class_before_clone = reg.pipe.__class__ reg.pipe = clone(reg.pipe) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertEqual(_class_before_clone, reg.pipe.__class__) self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 11.0) self.assertLess(score['mse'].values, 232.0)
def test_with_cv_pipeline_clone_fit_score(self): X, y = load_boston(return_X_y=True) X, y = pd.DataFrame(X), pd.Series(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) transformer_list = [('pca', PCA(n_components=1)), ('svd', TruncatedSVD(n_components=2))] union = FeatureUnion(transformer_list=transformer_list, n_jobs=-1) stack = dict(regressors=['kneighborsregressor', 'bayesianridge'], final_regressor='lasso') reg = Regressor(regressor_choice='mlxtendstackingcvregressor', pipeline_transform=('tr', union), stacking_options=dict(layers=stack)) reg.get_pipeline(y=y_train) _class_before_clone = reg.pipe.__class__ reg.pipe = clone(reg.pipe) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertEqual(_class_before_clone, reg.pipe.__class__) self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 11.0) self.assertLess(score['mse'].values, 232.0)
def test_benchmark(self): _, X_test, _, y_test = load_benchmark(return_split=True) reg = Regressor() score = reg.score(y_test, X_test) self.assertEqual(score['mae'].mean().round(decimals=2), 1.34) self.assertEqual(score['mse'].mean().round(decimals=2), 4.19) self.assertEqual(score['rmse'].mean().round(decimals=2), 1.88) self.assertEqual(score['r2'].mean().round(decimals=2), 0.99) self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
def test_multioutput_regressor_fit_score(self): bunch = load_linnerud(as_frame=True) # returns a Bunch instance X, y = bunch['data'], bunch['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) reg = Regressor(regressor_choice='ridge', pipeline_transform='standardscaler') y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred).mean() self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'], 0.0) self.assertGreaterEqual(score['mse'], 0.0) self.assertLess(score['mae'], 11.0) self.assertLess(score['mse'], 232.0)
def test_multioutput_regressorchain_fit_score(self): bunch = load_linnerud(as_frame=True) # returns a Bunch instance X, y = bunch['data'], bunch['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) params = dict(n_estimators=10, objective='reg:squarederror', booster='gbtree') reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler', params=params, chain_order=[0, 2, 1]) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred).mean() self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'], 0.0) self.assertGreaterEqual(score['mse'], 0.0) self.assertLess(score['mae'], 14.0) self.assertLess(score['mse'], 430.0)
def test_multioutput_regressorchain_with_cv_fit_score(self): bunch = load_linnerud(as_frame=True) # returns a Bunch instance X, y = bunch['data'], bunch['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) stack = dict(regressors=['kneighborsregressor', 'bayesianridge'], final_regressor='lasso') reg = Regressor(regressor_choice='mlxtendstackingcvregressor', stacking_layer=stack, pipeline_transform='standardscaler', chain_order=[2, 0, 1]) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred).mean() self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'], 0.0) self.assertGreaterEqual(score['mse'], 0.0) self.assertLess(score['mae'], 7.0) self.assertLess(score['mse'], 110.0)
def test_regressor_fit_score(self): X, y = load_boston(return_X_y=True) X, y = pd.DataFrame(X), pd.Series(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) reg = Regressor(regressor_choice='ridge', pipeline_transform='standardscaler') reg.fit(X_train, y_train) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 3.1) self.assertLess(score['mse'].values, 23.0)
def test_regressor_fit_score(self): X, y = load_boston(return_X_y=True) X, y = pd.DataFrame(X), pd.Series(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) params = dict(n_estimators=10, objective='reg:squarederror', booster='gbtree') reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler', params=params) reg.fit(X_train, y_train) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 2.5) self.assertLess(score['mse'].values, 14.0)
def test_stacking_regressor_with_cv_fit_score(self): X, y = load_boston(return_X_y=True) X, y = pd.DataFrame(X), pd.Series(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) stack = dict(regressors=['kneighborsregressor', 'bayesianridge'], final_regressor='lasso') reg = Regressor(regressor_choice='mlxtendstackingcvregressor', stacking_layer=stack, pipeline_transform='standardscaler') reg.fit(X_train, y_train) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 2.7) self.assertLess(score['mse'].values, 19.0)
def test_multioutput_regressor_fit_score(self): bunch = load_linnerud(as_frame=True) # returns a Bunch instance X, y = bunch['data'], bunch['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) params = dict(n_estimators=3, objective='mean_squared_error') reg = Regressor(regressor_choice='lgbmregressor', pipeline_transform='standardscaler', params=params) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred).mean() self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'], 0.0) self.assertGreaterEqual(score['mse'], 0.0) self.assertLess(score['mae'], 8.1) self.assertLess(score['mse'], 122.5)
def test_multioutput_regressorchain_fit_score(self): bunch = load_linnerud(as_frame=True) # returns a Bunch instance X, y = bunch['data'], bunch['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) params = dict(iterations=10, loss_function='RMSE') reg = Regressor(regressor_choice='catboostregressor', pipeline_transform='standardscaler', params=params, chain_order=[0, 2, 1]) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred).mean() self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'], 0.0) self.assertGreaterEqual(score['mse'], 0.0) self.assertLess(score['mae'], 11.0) self.assertLess(score['mse'], 240.0)
def test_regressor_fit_score(self): X, y = load_boston(return_X_y=True) X, y = pd.DataFrame(X), pd.Series(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) params = dict(iterations=10, loss_function='RMSE') reg = Regressor(regressor_choice='catboostregressor', pipeline_transform='standardscaler', params=params) reg.fit(X_train, y_train) y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertCountEqual(y_pred.index, y_test.index) self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 2.7) self.assertLess(score['mse'].values, 18.0)
# Choose the underlying regressor to be the Sklearn # histogram-based gradient boosting regressor. regressor_choice = 'HistGradientBoostingRegressor' # Choose the Sklearn QuantileTransformer as the data preprocessor. # The output distribution is the Gaussian, e.g., 'normal'. # The number of quantiles is the number of examples in y_train, # e.g., 15. pipeline_transform = 'quantilenormal' # Make an instance of the Regressor object. reg = Regressor(regressor_choice=regressor_choice, pipeline_transform=pipeline_transform) # Greedily fit the additive model reg.fit(X_train, y_train) # Generate test data predictions y_pred = reg.fit(X_train, y_train).predict(X_test) # Evaluate the test error, and store # the results as a DataFrame score = reg.score(y_test, y_pred) # Print the mean absolute error, mean squared error, # root mean squared error, R2, the expected variance, # and the mean squared log error for each individual # single-target regression subtask. print(score)
""" ============================ Benchmark test error ============================ This example generates the incumbent state-of-the-art's test error on the benchmark task. """ # Author: Alex Wozniakowski <*****@*****.**> from physlearn import Regressor from physlearn.datasets import load_benchmark # To comput the benchmark error, we only need the test data. # We denote the initial prediction examples as X_test and # the multi-targets as y_test. Both have the same shape, # namely (41, 5). _, X_test, _, y_test = load_benchmark(return_split=True) # Here we make an instance of Regressor, so that we can # automatically compute the test error as a DataFrame. reg = Regressor() test_error = reg.score(y_test, X_test) print('The single-target test error:') print(test_error.round(decimals=2)) print('The benchmark error:') print(test_error.mean().round(decimals=2))