def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier(
            self):
        cs = ParamSklearnRegressor.get_hyperparameter_search_space(
            include={'preprocessor': ['densifier']},
            dataset_properties={'sparse': True})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default,
                         'gradient_boosting')

        cs = ParamSklearnRegressor.get_hyperparameter_search_space(
            include={'preprocessor': ['nystroem_sampler']})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default,
                         'sgd')
 def test_get_hyperparameter_search_space(self):
     cs = ParamSklearnRegressor.get_hyperparameter_search_space()
     self.assertIsInstance(cs, ConfigurationSpace)
     conditions = cs.get_conditions()
     hyperparameters = cs.get_hyperparameters()
     self.assertEqual(114, len(hyperparameters))
     self.assertEqual(len(hyperparameters) - 5, len(conditions))
    def test_configurations(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = ParamSklearnRegressor.get_hyperparameter_search_space()

        print(cs)
        cs.seed(1)

        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if config['regressor:sgd:n_iter'] is not None:
                config._values['regressor:sgd:n_iter'] = 5

            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
            cls = ParamSklearnRegressor(config, random_state=1)
            print(config)
            try:
                cls.fit(X_train, Y_train)
                X_test_ = X_test.copy()
                predictions = cls.predict(X_test)
                self.assertIsInstance(predictions, np.ndarray)
                predicted_probabiliets = cls.predict(X_test_)
                self.assertIsInstance(predicted_probabiliets, np.ndarray)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except MemoryError as e:
                continue
    def test_get_hyperparameter_search_space_include_exclude_models(self):
        cs = ParamSklearnRegressor.get_hyperparameter_search_space(
            include={'regressor': ['random_forest']})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__'),
            CategoricalHyperparameter('regressor:__choice__', ['random_forest']))

        # TODO add this test when more than one regressor is present
        cs = ParamSklearnRegressor.get_hyperparameter_search_space(
            exclude={'regressor': ['random_forest']})
        self.assertNotIn('random_forest', str(cs))

        cs = ParamSklearnRegressor.get_hyperparameter_search_space(
            include={'preprocessor': ['pca']})
        self.assertEqual(cs.get_hyperparameter('preprocessor:__choice__'),
            CategoricalHyperparameter('preprocessor:__choice__', ['pca']))

        cs = ParamSklearnRegressor.get_hyperparameter_search_space(
            exclude={'preprocessor': ['no_preprocessing']})
        self.assertNotIn('no_preprocessing', str(cs))
    def test_predict_batched(self):
        cs = ParamSklearnRegressor.get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        cls = ParamSklearnRegressor(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)
 def test_default_configuration(self):
     for i in range(2):
         cs = ParamSklearnRegressor.get_hyperparameter_search_space()
         default = cs.get_default_configuration()
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = ParamSklearnRegressor(default)
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.41626416529791199, r2_score)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertEqual(model_score, r2_score)
def get_model(configuration, seed):
    if 'classifier' in configuration:
        return ParamSklearnClassifier(configuration, seed)
    elif 'regressor' in configuration:
        return ParamSklearnRegressor(configuration, seed)
 def test_repr(self):
     cs = ParamSklearnRegressor.get_hyperparameter_search_space()
     default = cs.get_default_configuration()
     representation = repr(ParamSklearnRegressor(default))
     cls = eval(representation)
     self.assertIsInstance(cls, ParamSklearnRegressor)