Beispiel #1
0
    def test_return_incumbent(self):
        X_train, X_test, y_train, y_test = load_benchmark(return_split=True)
        linear_basis_fn = 'ridge'
        n_regressors = 1
        boosting_loss = 'ls'
        line_search_options = dict(init_guess=1,
                                   opt_method='minimize',
                                   method='Nelder-Mead',
                                   tol=1e-7,
                                   options={"maxiter": 10000},
                                   niter=None,
                                   T=None,
                                   loss='lad',
                                   regularization=0.1)

        base_boosting_options = dict(n_regressors=n_regressors,
                                     boosting_loss=boosting_loss,
                                     line_search_options=line_search_options)
        index = 3
        reg = Regressor(regressor_choice=linear_basis_fn,
                        params=dict(alpha=0.1),
                        target_index=index,
                        base_boosting_options=base_boosting_options)
        reg.baseboostcv(X_train.iloc[:10, :], y_train.iloc[:10, :])
        self.assertHasAttr(reg, 'return_incumbent_')
Beispiel #2
0
 def test_baseboostcv_score(self):
     X_train, X_test, y_train, y_test = load_benchmark(return_split=True)
     stack = dict(regressors=['ridge', 'lgbmregressor'],
                  final_regressor='ridge')
     line_search_options = dict(init_guess=1,
                                opt_method='minimize',
                                method='Nelder-Mead',
                                tol=1e-7,
                                options={"maxiter": 10000},
                                niter=None,
                                T=None,
                                loss='lad',
                                regularization=0.1)
     base_boosting_options = dict(n_regressors=3,
                                  boosting_loss='ls',
                                  line_search_options=line_search_options)
     reg = Regressor(regressor_choice='stackingregressor',
                     target_index=0,
                     stacking_options=dict(layers=stack),
                     base_boosting_options=base_boosting_options)
     y_pred = reg.baseboostcv(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertNotHasAttr(reg, 'return_incumbent_')
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 2.0)
     self.assertLess(score['mse'].values, 6.2)
 def test_shap_explainer(self):
     X_train, _, y_train, _ = load_benchmark(return_split=True)
     index = 3
     interpret = ShapInterpret(regressor_choice='ridgecv',
                               target_index=index)
     interpret.fit(X=X_train, y=y_train, index=index)
     explainer, shap_values = interpret.explainer(X=X_train)
     self.assertEqual(X_train.shape, shap_values.shape)
Beispiel #4
0
 def test_benchmark(self):
     _, X_test, _, y_test = load_benchmark(return_split=True)
     reg = Regressor()
     score = reg.score(y_test, X_test)
     self.assertEqual(score['mae'].mean().round(decimals=2), 1.34)
     self.assertEqual(score['mse'].mean().round(decimals=2), 4.19)
     self.assertEqual(score['rmse'].mean().round(decimals=2), 1.88)
     self.assertEqual(score['r2'].mean().round(decimals=2), 0.99)
     self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
Beispiel #5
0
 def test_shap_explainer(self):
     X_train, _, y_train, _ = load_benchmark(return_split=True)
     index = 3
     params = dict(iterations=10, loss_function='RMSE')
     interpret = ShapInterpret(regressor_choice='catboostregressor',
                               target_index=index,
                               params=params)
     interpret.fit(X=X_train, y=y_train, index=index)
     explainer, shap_values = interpret.explainer(X=X_train)
     self.assertEqual(X_train.shape, shap_values.shape)
 def test_shap_explainer(self):
     X_train, _, y_train, _ = load_benchmark(return_split=True)
     index = 3
     params = dict(n_estimators=3, objective='mean_squared_error')
     interpret = ShapInterpret(regressor_choice='lgbmregressor',
                               target_index=index,
                               params=params)
     interpret.fit(X=X_train, y=y_train, index=index)
     explainer, shap_values = interpret.explainer(X=X_train)
     self.assertEqual(X_train.shape, shap_values.shape)
 def test_without_cv_shap_explainer(self):
     X_train, _, y_train, _ = load_benchmark(return_split=True)
     index = 3
     stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                  final_regressor='lasso')
     interpret = ShapInterpret(regressor_choice='mlxtendstackingregressor',
                               target_index=index,
                               stacking_options=dict(layers=stack))
     interpret.fit(X=X_train, y=y_train, index=index)
     explainer, shap_values = interpret.explainer(X=X_train)
     self.assertEqual(X_train.shape, shap_values.shape)
Beispiel #8
0
 def test_pipeline_score_uniform_average(self):
     X_train, X_test, y_train, y_test = load_benchmark(return_split=True)
     line_search_options = dict(init_guess=1,
                                opt_method='minimize',
                                method='Nelder-Mead',
                                tol=1e-7,
                                options={"maxiter": 10000},
                                niter=None,
                                T=None,
                                loss='lad',
                                regularization=0.1)
     base_boosting_options = dict(n_regressors=3,
                                  boosting_loss='lad',
                                  line_search_options=line_search_options)
     pipe = ModifiedPipeline(steps=[('scaler', StandardScaler()),
                                    ('reg', Ridge())],
                             base_boosting_options=base_boosting_options)
     pipe.fit(X_train, y_train)
     score = pipe.score(X_test, y_test, multioutput='uniform_average')
     self.assertEqual(score['mae'].mean().round(decimals=2), 1.19)
     self.assertEqual(score['mse'].mean().round(decimals=2), 3.49)
     self.assertEqual(score['rmse'].mean().round(decimals=2), 1.87)
     self.assertEqual(score['r2'].mean().round(decimals=2), 0.99)
     self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
Beispiel #9
0
"""

# Author: Alex Wozniakowski <*****@*****.**>

from physlearn import Regressor
from physlearn.datasets import load_benchmark, supplementary_params
from physlearn.supervised import ShapInterpret


# Here we load the the training data, as well as the test data.
# Each example corresponds to 9 raw control voltage features,
# and the multi-targets are the sorted eigenenergies, as in the
# benchmark task.
n_features = 9
n_targets = 5
data = load_benchmark()
X_train, X_test = data['X_train'].iloc[:, :n_features], data['X_test'].iloc[:, :n_features]
y_train, y_test = data['y_train'].iloc[:, :n_targets], data['y_test'].iloc[:, :n_targets]

# We focus on the first single-target regression subtask,
# as this is the most difficult subtask for the base regressor.
# Note that the index corresponds to the Python convention.
index = 0

# We choose the Sklearn MLPRegressor.
model = 'mlpregressor'

# We make an instance of ShapInterpret with our choice
# of neural network for the single-target regression subtask: 1.
# We set the show parameter as True, which enables SHAP to display
# the plot.
Beispiel #10
0
import pandas as pd

from physlearn import Regressor
from physlearn.datasets import load_benchmark, additional_paper_params

# Here we load the the training data, as well as the test data.
# The shapes of X_train and y_train are (95, 5), and the shapes
# of X_test and y_test are (41, 5).
X_train, X_test, y_train, y_test = load_benchmark(return_split=True)

# We choose LightGBM LGBMRegressor.
model = 'lgbmregressor'

print('Building scoring DataFrame for each single-target subtask.')
test_error = []
for index in range(5):
    # We make an instance of Regressor with our choice of
    # gradient-based one-side sampling for each single-target
    # regression subtask.
    reg = Regressor(regressor_choice=model,
                    params=additional_paper_params(index),
                    target_index=index)

    # We invoke the fit and predict methods, then we
    # compute the single-target test error.
    y_pred = reg.fit(X_train, y_train).predict(X_test)
    score = reg.score(y_test, y_pred)
    test_error.append(score)

test_error = pd.concat(test_error)
print('Finished building the scoring DataFrame.')
"""
============================
Benchmark test error
============================

This example generates the incumbent state-of-the-art's
test error on the benchmark task. 
"""

# Author: Alex Wozniakowski <*****@*****.**>

from physlearn import Regressor
from physlearn.datasets import load_benchmark

# To comput the benchmark error, we only need the test data.
# We denote the initial prediction examples as X_test and
# the multi-targets as y_test. Both have the same shape,
# namely (41, 5).
_, X_test, _, y_test = load_benchmark(return_split=True)

# Here we make an instance of Regressor, so that we can
# automatically compute the test error as a DataFrame.
reg = Regressor()
test_error = reg.score(y_test, X_test)

print('The single-target test error:')
print(test_error.round(decimals=2))
print('The benchmark error:')
print(test_error.mean().round(decimals=2))
Beispiel #12
0
This example generates an augmented learning curve for a
quantum device calibration application. 
"""

# Author: Alex Wozniakowski <*****@*****.**>

import numpy as np

from physlearn import Regressor
from physlearn.datasets import load_benchmark, paper_params
from physlearn.supervised import plot_learning_curve

# Here we load the the training data. The shapes of X_train
# and y_train are (95, 5).
X_train, _, y_train, _ = load_benchmark(return_split=True)

# We choose the Sklearn StackingRegressor as the basis function b
# in Eq. 11 of the main body. The first stacking layer consists of
# the Sklearn MLPRegressor and the LightGBM LGBMRegressor. The
# second stacking layer consists of the Sklearn MLPRegressor.
basis_fn = 'stackingregressor'
stack = dict(regressors=['mlpregressor', 'lgbmregressor'],
             final_regressor='mlpregressor')

# The number of regressors corresponds to K in Eq. 11.
n_regressors = 1

# The boosting loss is the squared error loss function, which is
# utilized in the computation of the pseudo-residuals, e.g., the
# negative gradient.