Ejemplo n.º 1
0
    def test_stacking_regressor_with_cv_gridsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')

        reg = Regressor(regressor_choice='mlxtendstackingcvregressor',
                        pipeline_transform='standardscaler',
                        stacking_options=dict(layers=stack))
        search_params = dict(reg__kneighborsregressor__n_neighbors=[2, 4, 5],
                             reg__bayesianridge__alpha_1=[1e-7, 1e-6],
                             reg__meta_regressor__alpha=[1.0],
                             tr__with_std=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 2.8)
        self.assertIn(
            reg.best_params_['reg__kneighborsregressor__n_neighbors'],
            [2, 4, 5])
        self.assertIn(reg.best_params_['reg__bayesianridge__alpha_1'],
                      [1e-7, 1e-6])
        self.assertIn(reg.best_params_['reg__meta_regressor__alpha'], [1.0])
Ejemplo n.º 2
0
    def test_return_incumbent(self):
        X_train, X_test, y_train, y_test = load_benchmark(return_split=True)
        linear_basis_fn = 'ridge'
        n_regressors = 1
        boosting_loss = 'ls'
        line_search_options = dict(init_guess=1,
                                   opt_method='minimize',
                                   method='Nelder-Mead',
                                   tol=1e-7,
                                   options={"maxiter": 10000},
                                   niter=None,
                                   T=None,
                                   loss='lad',
                                   regularization=0.1)

        base_boosting_options = dict(n_regressors=n_regressors,
                                     boosting_loss=boosting_loss,
                                     line_search_options=line_search_options)
        index = 3
        reg = Regressor(regressor_choice=linear_basis_fn,
                        params=dict(alpha=0.1),
                        target_index=index,
                        base_boosting_options=base_boosting_options)
        reg.baseboostcv(X_train.iloc[:10, :], y_train.iloc[:10, :])
        self.assertHasAttr(reg, 'return_incumbent_')
Ejemplo n.º 3
0
 def test_baseboostcv_score(self):
     X_train, X_test, y_train, y_test = load_benchmark(return_split=True)
     stack = dict(regressors=['ridge', 'lgbmregressor'],
                  final_regressor='ridge')
     line_search_options = dict(init_guess=1,
                                opt_method='minimize',
                                method='Nelder-Mead',
                                tol=1e-7,
                                options={"maxiter": 10000},
                                niter=None,
                                T=None,
                                loss='lad',
                                regularization=0.1)
     base_boosting_options = dict(n_regressors=3,
                                  boosting_loss='ls',
                                  line_search_options=line_search_options)
     reg = Regressor(regressor_choice='stackingregressor',
                     target_index=0,
                     stacking_options=dict(layers=stack),
                     base_boosting_options=base_boosting_options)
     y_pred = reg.baseboostcv(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertNotHasAttr(reg, 'return_incumbent_')
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 2.0)
     self.assertLess(score['mse'].values, 6.2)
Ejemplo n.º 4
0
    def test_multioutput_regressorchain_randomizedsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler',
                        randomizedcv_n_iter=6,
                        chain_order=[2, 0, 1])
        search_params = dict(reg__alpha=uniform(loc=0.01, scale=1.5),
                             reg__fit_intercept=[True, False],
                             tr__with_std=[True, False])
        reg.search(X_train,
                   y_train,
                   search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 12.0)
        self.assertLessEqual(reg.best_params_['reg__base_estimator__alpha'],
                             1.51)
        self.assertGreaterEqual(reg.best_params_['reg__base_estimator__alpha'],
                                0.01)
        self.assertIn(reg.best_params_['reg__base_estimator__fit_intercept'],
                      [True, False])
Ejemplo n.º 5
0
    def test_multioutput_regressorchain_with_cv_randomizedsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')

        reg = Regressor(regressor_choice='mlxtendstackingcvregressor', stacking_layer=stack,
                        pipeline_transform='standardscaler', chain_order=[2, 0, 1])
        search_params = {'kneighborsregressor__n_neighbors': randint(low=2, high=5),
                         'bayesianridge__alpha_1': [1e-7, 1e-6],
                         'meta_regressor__alpha': [1.0]}
        reg.search(X_train, y_train, search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 10.0)
        self.assertLessEqual(reg.best_params_['reg__base_estimator__kneighborsregressor__n_neighbors'],
                             5)
        self.assertGreaterEqual(reg.best_params_['reg__base_estimator__kneighborsregressor__n_neighbors'],
                                2)
        self.assertIn(reg.best_params_['reg__base_estimator__bayesianridge__alpha_1'],
                      [1e-7, 1e-6])
        self.assertIn(reg.best_params_['reg__base_estimator__meta_regressor__alpha'],
                      [1.0])
Ejemplo n.º 6
0
    def test_multioutput_regressor_without_cv_gridsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')

        reg = Regressor(regressor_choice='mlxtendstackingregressor',
                        pipeline_transform='standardscaler',
                        stacking_options=dict(layers=stack))
        search_params = dict(reg__kneighborsregressor__n_neighbors=[2, 4, 5],
                             reg__bayesianridge__alpha_1=[1e-7, 1e-6],
                             reg__meta_regressor__alpha=[1.0],
                             tr__with_std=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 10.0)
        self.assertIn(
            reg.
            best_params_['reg__estimator__kneighborsregressor__n_neighbors'],
            [2, 4, 5])
        self.assertIn(
            reg.best_params_['reg__estimator__bayesianridge__alpha_1'],
            [1e-7, 1e-6])
        self.assertIn(
            reg.best_params_['reg__estimator__meta_regressor__alpha'], [1.0])
Ejemplo n.º 7
0
    def test_stacking_regressor_randomizedsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')

        reg = Regressor(regressor_choice='stackingregressor',
                        stacking_layer=stack,
                        pipeline_transform='standardscaler')
        search_params = {
            '0__n_neighbors': randint(low=2, high=5),
            '1__alpha_1': [1e-7, 1e-6],
            'final_estimator__alpha': [1.0]
        }
        reg.search(X_train,
                   y_train,
                   search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 2.8)
        self.assertLessEqual(reg.best_params_['reg__0__n_neighbors'], 5)
        self.assertGreaterEqual(reg.best_params_['reg__0__n_neighbors'], 2)
        self.assertIn(reg.best_params_['reg__1__alpha_1'], [1e-7, 1e-6])
        self.assertIn(reg.best_params_['reg__final_estimator__alpha'], [1.0])
Ejemplo n.º 8
0
    def test_multioutput_regressorchain_randomizedsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(n_estimators=10,
                      objective='reg:squarederror',
                      booster='gbtree')
        reg = Regressor(regressor_choice='xgbregressor',
                        pipeline_transform='standardscaler',
                        params=params,
                        randomizedcv_n_iter=6,
                        chain_order=[2, 0, 1])
        search_params = dict(reg__n_estimators=randint(low=3, high=10),
                             tr__with_std=[True, False])
        reg.search(X_train,
                   y_train,
                   search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 42.0)
        self.assertLessEqual(
            reg.best_params_['reg__base_estimator__n_estimators'], 10)
        self.assertGreaterEqual(
            reg.best_params_['reg__base_estimator__n_estimators'], 3)
Ejemplo n.º 9
0
    def test_stacking_regressor_without_cv_randomizedsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')

        reg = Regressor(regressor_choice='mlxtendstackingregressor',
                        pipeline_transform='standardscaler',
                        stacking_options=dict(layers=stack),
                        randomizedcv_n_iter=6)
        search_params = dict(reg__kneighborsregressor__n_neighbors=randint(
            low=2, high=5),
                             reg__bayesianridge__alpha_1=[1e-7, 1e-6],
                             reg__meta_regressor__alpha=[1.0],
                             tr__with_std=[True, False])
        reg.search(X_train,
                   y_train,
                   search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 3.0)
        self.assertLessEqual(
            reg.best_params_['reg__kneighborsregressor__n_neighbors'], 5)
        self.assertGreaterEqual(
            reg.best_params_['reg__kneighborsregressor__n_neighbors'], 2)
        self.assertIn(reg.best_params_['reg__bayesianridge__alpha_1'],
                      [1e-7, 1e-6])
        self.assertIn(reg.best_params_['reg__meta_regressor__alpha'], [1.0])
Ejemplo n.º 10
0
 def test_benchmark(self):
     _, X_test, _, y_test = load_benchmark(return_split=True)
     reg = Regressor()
     score = reg.score(y_test, X_test)
     self.assertEqual(score['mae'].mean().round(decimals=2), 1.34)
     self.assertEqual(score['mse'].mean().round(decimals=2), 4.19)
     self.assertEqual(score['rmse'].mean().round(decimals=2), 1.88)
     self.assertEqual(score['r2'].mean().round(decimals=2), 0.99)
     self.assertEqual(score['ev'].mean().round(decimals=2), 0.99)
Ejemplo n.º 11
0
    def test_regressor_gridsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        search_params = dict(n_estimators=[3, 5, 10])
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler')
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 3.8)
        self.assertIn(reg.best_params_['reg__n_estimators'], [3, 5, 10])
Ejemplo n.º 12
0
    def test_multioutput_regressor_gridsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        search_params = dict(n_estimators=[3, 5, 10])
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler')
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 10.5)
        self.assertIn(reg.best_params_['reg__estimator__n_estimators'], [3, 5, 10])
Ejemplo n.º 13
0
    def test_regressor_randomizedsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        search_params = dict(n_estimators=randint(low=3, high=10))
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler')
        reg.search(X_train, y_train, search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 4.0)
        self.assertLessEqual(reg.best_params_['reg__n_estimators'], 10)
        self.assertGreaterEqual(reg.best_params_['reg__n_estimators'], 3)
Ejemplo n.º 14
0
    def test_multioutput_regressor_randomizedsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        search_params = dict(n_estimators=randint(low=3, high=10))
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler')
        reg.search(X_train, y_train, search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 12.0)
        self.assertLessEqual(reg.best_params_['reg__estimator__n_estimators'], 10)
        self.assertGreaterEqual(reg.best_params_['reg__estimator__n_estimators'], 3)
Ejemplo n.º 15
0
    def test_regressor_gridsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler')
        search_params = dict(alpha=[0.1, 0.2, 0.5],
                             fit_intercept=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 3.6)
        self.assertIn(reg.best_params_['reg__alpha'], [0.1, 0.2, 0.5])
        self.assertIn(reg.best_params_['reg__fit_intercept'], [True, False])
Ejemplo n.º 16
0
    def test_regressor_gridsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(iterations=10, loss_function='RMSE')
        reg = Regressor(regressor_choice='catboostregressor',
                        pipeline_transform='standardscaler',
                        params=params)
        search_params = dict(reg__iterations=[3, 5, 10],
                             tr__with_std=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 3.6)
        self.assertIn(reg.best_params_['reg__iterations'], [3, 5, 10])
Ejemplo n.º 17
0
    def test_multioutput_regressorchain_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        params = dict(n_estimators=10, objective='reg:squarederror', booster='gbtree')
        reg = Regressor(regressor_choice='xgbregressor', pipeline_transform='standardscaler',
                        params=params, chain_order=[0, 2, 1])
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 14.0)
        self.assertLess(score['mse'], 430.0)
Ejemplo n.º 18
0
    def test_multioutput_regressor_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler')
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 11.0)
        self.assertLess(score['mse'], 232.0)
Ejemplo n.º 19
0
    def test_multioutput_regressor_gridsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(iterations=10, loss_function='RMSE')
        reg = Regressor(regressor_choice='catboostregressor',
                        pipeline_transform='standardscaler',
                        params=params)
        search_params = dict(reg__iterations=[3, 5, 10],
                             tr__with_std=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 10.0)
        self.assertIn(reg.best_params_['reg__estimator__iterations'],
                      [3, 5, 10])
Ejemplo n.º 20
0
    def test_multioutput_regressorchain_with_cv_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=42)

        stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                     final_regressor='lasso')
        reg = Regressor(regressor_choice='mlxtendstackingcvregressor', stacking_layer=stack,
                        pipeline_transform='standardscaler', chain_order=[2, 0, 1])
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 7.0)
        self.assertLess(score['mse'], 110.0)
Ejemplo n.º 21
0
    def test_multioutput_regressor_gridsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler')
        search_params = dict(alpha=[0.1, 0.2, 0.5],
                             fit_intercept=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 10.0)
        self.assertIn(reg.best_params_['reg__estimator__alpha'],
                      [0.1, 0.2, 0.5])
        self.assertIn(reg.best_params_['reg__estimator__fit_intercept'],
                      [True, False])
Ejemplo n.º 22
0
 def test_with_cv_pipeline_clone_fit_score(self):
     X, y = load_boston(return_X_y=True)
     X, y = pd.DataFrame(X), pd.Series(y)
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         random_state=42)
     transformer_list = [('pca', PCA(n_components=1)),
                         ('svd', TruncatedSVD(n_components=2))]
     union = FeatureUnion(transformer_list=transformer_list, n_jobs=-1)
     stack = dict(regressors=['kneighborsregressor', 'bayesianridge'],
                  final_regressor='lasso')
     reg = Regressor(regressor_choice='mlxtendstackingcvregressor',
                     pipeline_transform=('tr', union),
                     stacking_options=dict(layers=stack))
     reg.get_pipeline(y=y_train)
     _class_before_clone = reg.pipe.__class__
     reg.pipe = clone(reg.pipe)
     y_pred = reg.fit(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertEqual(_class_before_clone, reg.pipe.__class__)
     self.assertCountEqual(y_pred.index, y_test.index)
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 11.0)
     self.assertLess(score['mse'].values, 232.0)
Ejemplo n.º 23
0
 def test_pipeline_clone_fit_score(self):
     X, y = load_boston(return_X_y=True)
     X, y = pd.DataFrame(X), pd.Series(y)
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         random_state=42)
     transformer_list = [('pca', PCA(n_components=1)),
                         ('svd', TruncatedSVD(n_components=2))]
     union = FeatureUnion(transformer_list=transformer_list, n_jobs=-1)
     params = dict(n_estimators=10,
                   objective='reg:squarederror',
                   booster='gbtree')
     reg = Regressor(regressor_choice='xgbregressor',
                     pipeline_transform=('tr', union),
                     params=params)
     reg.get_pipeline(y=y_train)
     _class_before_clone = reg.pipe.__class__
     reg.pipe = clone(reg.pipe)
     y_pred = reg.fit(X_train, y_train).predict(X_test)
     score = reg.score(y_test, y_pred)
     self.assertEqual(_class_before_clone, reg.pipe.__class__)
     self.assertCountEqual(y_pred.index, y_test.index)
     self.assertGreaterEqual(score['mae'].values, 0.0)
     self.assertGreaterEqual(score['mse'].values, 0.0)
     self.assertLess(score['mae'].values, 11.0)
     self.assertLess(score['mse'].values, 232.0)
Ejemplo n.º 24
0
    def test_multioutput_regressorchain_gridsearchcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(n_estimators=3, objective='mean_squared_error')
        reg = Regressor(regressor_choice='lgbmregressor',
                        pipeline_transform='standardscaler',
                        params=params,
                        chain_order=[2, 0, 1])
        search_params = dict(reg__n_estimators=[3, 5, 10],
                             tr__with_std=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 10.0)
        self.assertIn(reg.best_params_['reg__base_estimator__n_estimators'],
                      [3, 5, 10])
Ejemplo n.º 25
0
    def test_multioutput_regressor_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(n_estimators=3, objective='mean_squared_error')
        reg = Regressor(regressor_choice='lgbmregressor',
                        pipeline_transform='standardscaler',
                        params=params)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 8.1)
        self.assertLess(score['mse'], 122.5)
Ejemplo n.º 26
0
    def test_regressor_gridsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(n_estimators=10,
                      objective='reg:squarederror',
                      booster='gbtree')
        reg = Regressor(regressor_choice='xgbregressor',
                        pipeline_transform='standardscaler',
                        params=params)
        search_params = dict(reg__n_estimators=[3, 5, 10],
                             tr__with_std=[True, False])
        reg.search(X_train, y_train, search_params=search_params)
        self.assertLess(reg.best_score_.values, 9.0)
        self.assertIn(reg.best_params_['reg__n_estimators'], [3, 5, 10])
Ejemplo n.º 27
0
    def test_regressor_randomizedsearchcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='ridge',
                        pipeline_transform='standardscaler')
        search_params = dict(alpha=uniform(loc=0.01, scale=1.5),
                             fit_intercept=[True, False])
        reg.search(X_train,
                   y_train,
                   search_params=search_params,
                   search_method='randomizedsearchcv')
        self.assertLess(reg.best_score_.values, 3.6)
        self.assertLessEqual(reg.best_params_['reg__alpha'], 1.51)
        self.assertGreaterEqual(reg.best_params_['reg__alpha'], 0.01)
        self.assertIn(reg.best_params_['reg__fit_intercept'], [True, False])
Ejemplo n.º 28
0
    def test_multioutput_regressorchain_fit_score(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        params = dict(iterations=10, loss_function='RMSE')
        reg = Regressor(regressor_choice='catboostregressor',
                        pipeline_transform='standardscaler',
                        params=params,
                        chain_order=[0, 2, 1])
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        score = reg.score(y_test, y_pred).mean()
        self.assertCountEqual(y_pred.index, y_test.index)
        self.assertGreaterEqual(score['mae'], 0.0)
        self.assertGreaterEqual(score['mse'], 0.0)
        self.assertLess(score['mae'], 11.0)
        self.assertLess(score['mse'], 240.0)
Ejemplo n.º 29
0
    def test_regressor_bayesoptcv(self):
        X, y = load_boston(return_X_y=True)
        X, y = pd.DataFrame(X), pd.Series(y)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='svr',
                        pipeline_transform='standardscaler')
        search_pbounds = dict(gamma=(0.1, 2.0), epsilon=(0.1, 0.4))
        reg.search(X_train,
                   y_train,
                   search_params=search_pbounds,
                   search_method='bayesoptcv')
        self.assertLess(reg.best_score_.values, 3.7)
        self.assertLessEqual(reg.best_params_['reg__gamma'], 2.0)
        self.assertGreaterEqual(reg.best_params_['reg__gamma'], 0.1)
        self.assertLessEqual(reg.best_params_['reg__epsilon'], 0.4)
        self.assertGreaterEqual(reg.best_params_['reg__epsilon'], 0.1)
Ejemplo n.º 30
0
    def test_multioutput_regressor_bayesoptcv(self):
        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
        X, y = bunch['data'], bunch['target']
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)

        reg = Regressor(regressor_choice='svr',
                        pipeline_transform='standardscaler')
        search_pbounds = dict(gamma=(0.1, 2.0), epsilon=(0.1, 0.4))
        reg.search(X_train,
                   y_train,
                   search_params=search_pbounds,
                   search_method='bayesoptcv')
        self.assertLess(reg.best_score_.values, 10.0)
        self.assertLessEqual(reg.best_params_['reg__estimator__gamma'], 2.0)
        self.assertGreaterEqual(reg.best_params_['reg__estimator__gamma'], 0.1)
        self.assertLessEqual(reg.best_params_['reg__estimator__epsilon'], 0.4)
        self.assertGreaterEqual(reg.best_params_['reg__estimator__epsilon'],
                                0.1)