def test_return_incumbent(self): X_train, X_test, y_train, y_test = load_benchmark(return_split=True) linear_basis_fn = 'ridge' n_regressors = 1 boosting_loss = 'ls' line_search_options = dict(init_guess=1, opt_method='minimize', method='Nelder-Mead', tol=1e-7, options={"maxiter": 10000}, niter=None, T=None, loss='lad', regularization=0.1) base_boosting_options = dict(n_regressors=n_regressors, boosting_loss=boosting_loss, line_search_options=line_search_options) index = 3 reg = Regressor(regressor_choice=linear_basis_fn, params=dict(alpha=0.1), target_index=index, base_boosting_options=base_boosting_options) reg.baseboostcv(X_train.iloc[:10, :], y_train.iloc[:10, :]) self.assertHasAttr(reg, 'return_incumbent_')
def test_baseboostcv_score(self): X_train, X_test, y_train, y_test = load_benchmark(return_split=True) stack = dict(regressors=['ridge', 'lgbmregressor'], final_regressor='ridge') line_search_options = dict(init_guess=1, opt_method='minimize', method='Nelder-Mead', tol=1e-7, options={"maxiter": 10000}, niter=None, T=None, loss='lad', regularization=0.1) base_boosting_options = dict(n_regressors=3, boosting_loss='ls', line_search_options=line_search_options) reg = Regressor(regressor_choice='stackingregressor', target_index=0, stacking_options=dict(layers=stack), base_boosting_options=base_boosting_options) y_pred = reg.baseboostcv(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) self.assertNotHasAttr(reg, 'return_incumbent_') self.assertGreaterEqual(score['mae'].values, 0.0) self.assertGreaterEqual(score['mse'].values, 0.0) self.assertLess(score['mae'].values, 2.0) self.assertLess(score['mse'].values, 6.2)
def test_shap_explainer(self): X_train, _, y_train, _ = load_benchmark(return_split=True) index = 3 interpret = ShapInterpret(regressor_choice='ridgecv', target_index=index) interpret.fit(X=X_train, y=y_train, index=index) explainer, shap_values = interpret.explainer(X=X_train) self.assertEqual(X_train.shape, shap_values.shape)
def test_benchmark(self): _, X_test, _, y_test = load_benchmark(return_split=True) reg = Regressor() score = reg.score(y_test, X_test) self.assertEqual(score['mae'].mean().round(decimals=2), 1.34) self.assertEqual(score['mse'].mean().round(decimals=2), 4.19) self.assertEqual(score['rmse'].mean().round(decimals=2), 1.88) self.assertEqual(score['r2'].mean().round(decimals=2), 0.99) self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
def test_shap_explainer(self): X_train, _, y_train, _ = load_benchmark(return_split=True) index = 3 params = dict(iterations=10, loss_function='RMSE') interpret = ShapInterpret(regressor_choice='catboostregressor', target_index=index, params=params) interpret.fit(X=X_train, y=y_train, index=index) explainer, shap_values = interpret.explainer(X=X_train) self.assertEqual(X_train.shape, shap_values.shape)
def test_shap_explainer(self): X_train, _, y_train, _ = load_benchmark(return_split=True) index = 3 params = dict(n_estimators=3, objective='mean_squared_error') interpret = ShapInterpret(regressor_choice='lgbmregressor', target_index=index, params=params) interpret.fit(X=X_train, y=y_train, index=index) explainer, shap_values = interpret.explainer(X=X_train) self.assertEqual(X_train.shape, shap_values.shape)
def test_without_cv_shap_explainer(self): X_train, _, y_train, _ = load_benchmark(return_split=True) index = 3 stack = dict(regressors=['kneighborsregressor', 'bayesianridge'], final_regressor='lasso') interpret = ShapInterpret(regressor_choice='mlxtendstackingregressor', target_index=index, stacking_options=dict(layers=stack)) interpret.fit(X=X_train, y=y_train, index=index) explainer, shap_values = interpret.explainer(X=X_train) self.assertEqual(X_train.shape, shap_values.shape)
def test_pipeline_score_uniform_average(self): X_train, X_test, y_train, y_test = load_benchmark(return_split=True) line_search_options = dict(init_guess=1, opt_method='minimize', method='Nelder-Mead', tol=1e-7, options={"maxiter": 10000}, niter=None, T=None, loss='lad', regularization=0.1) base_boosting_options = dict(n_regressors=3, boosting_loss='lad', line_search_options=line_search_options) pipe = ModifiedPipeline(steps=[('scaler', StandardScaler()), ('reg', Ridge())], base_boosting_options=base_boosting_options) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test, multioutput='uniform_average') self.assertEqual(score['mae'].mean().round(decimals=2), 1.19) self.assertEqual(score['mse'].mean().round(decimals=2), 3.49) self.assertEqual(score['rmse'].mean().round(decimals=2), 1.87) self.assertEqual(score['r2'].mean().round(decimals=2), 0.99) self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
""" # Author: Alex Wozniakowski <*****@*****.**> from physlearn import Regressor from physlearn.datasets import load_benchmark, supplementary_params from physlearn.supervised import ShapInterpret # Here we load the the training data, as well as the test data. # Each example corresponds to 9 raw control voltage features, # and the multi-targets are the sorted eigenenergies, as in the # benchmark task. n_features = 9 n_targets = 5 data = load_benchmark() X_train, X_test = data['X_train'].iloc[:, :n_features], data['X_test'].iloc[:, :n_features] y_train, y_test = data['y_train'].iloc[:, :n_targets], data['y_test'].iloc[:, :n_targets] # We focus on the first single-target regression subtask, # as this is the most difficult subtask for the base regressor. # Note that the index corresponds to the Python convention. index = 0 # We choose the Sklearn MLPRegressor. model = 'mlpregressor' # We make an instance of ShapInterpret with our choice # of neural network for the single-target regression subtask: 1. # We set the show parameter as True, which enables SHAP to display # the plot.
import pandas as pd from physlearn import Regressor from physlearn.datasets import load_benchmark, additional_paper_params # Here we load the the training data, as well as the test data. # The shapes of X_train and y_train are (95, 5), and the shapes # of X_test and y_test are (41, 5). X_train, X_test, y_train, y_test = load_benchmark(return_split=True) # We choose LightGBM LGBMRegressor. model = 'lgbmregressor' print('Building scoring DataFrame for each single-target subtask.') test_error = [] for index in range(5): # We make an instance of Regressor with our choice of # gradient-based one-side sampling for each single-target # regression subtask. reg = Regressor(regressor_choice=model, params=additional_paper_params(index), target_index=index) # We invoke the fit and predict methods, then we # compute the single-target test error. y_pred = reg.fit(X_train, y_train).predict(X_test) score = reg.score(y_test, y_pred) test_error.append(score) test_error = pd.concat(test_error) print('Finished building the scoring DataFrame.')
""" ============================ Benchmark test error ============================ This example generates the incumbent state-of-the-art's test error on the benchmark task. """ # Author: Alex Wozniakowski <*****@*****.**> from physlearn import Regressor from physlearn.datasets import load_benchmark # To comput the benchmark error, we only need the test data. # We denote the initial prediction examples as X_test and # the multi-targets as y_test. Both have the same shape, # namely (41, 5). _, X_test, _, y_test = load_benchmark(return_split=True) # Here we make an instance of Regressor, so that we can # automatically compute the test error as a DataFrame. reg = Regressor() test_error = reg.score(y_test, X_test) print('The single-target test error:') print(test_error.round(decimals=2)) print('The benchmark error:') print(test_error.mean().round(decimals=2))
This example generates an augmented learning curve for a quantum device calibration application. """ # Author: Alex Wozniakowski <*****@*****.**> import numpy as np from physlearn import Regressor from physlearn.datasets import load_benchmark, paper_params from physlearn.supervised import plot_learning_curve # Here we load the the training data. The shapes of X_train # and y_train are (95, 5). X_train, _, y_train, _ = load_benchmark(return_split=True) # We choose the Sklearn StackingRegressor as the basis function b # in Eq. 11 of the main body. The first stacking layer consists of # the Sklearn MLPRegressor and the LightGBM LGBMRegressor. The # second stacking layer consists of the Sklearn MLPRegressor. basis_fn = 'stackingregressor' stack = dict(regressors=['mlpregressor', 'lgbmregressor'], final_regressor='mlpregressor') # The number of regressors corresponds to K in Eq. 11. n_regressors = 1 # The boosting loss is the squared error loss function, which is # utilized in the computation of the pseudo-residuals, e.g., the # negative gradient.