Python SimpleRegressionPipeline.get_hyperparameter_search_space Examples, autosklearn.pipeline.regression.SimpleRegressionPipeline.get_hyperparameter_search_space Python Examples

Example #1

0

Show file

File: test_regression.py Project: TheVinhLuong102/auto-sklearn

    def test_get_hyperparameter_search_space_include_exclude_models(self):
        regressor = SimpleRegressionPipeline(
            include={'regressor': ['random_forest']})
        cs = regressor.get_hyperparameter_search_space()
        self.assertEqual(
            cs.get_hyperparameter('regressor:__choice__'),
            CategoricalHyperparameter('regressor:__choice__',
                                      ['random_forest']),
        )

        # TODO add this test when more than one regressor is present
        regressor = SimpleRegressionPipeline(
            exclude={'regressor': ['random_forest']})
        cs = regressor.get_hyperparameter_search_space()
        self.assertNotIn('random_forest', str(cs))

        regressor = SimpleRegressionPipeline(
            include={'feature_preprocessor': ['pca']})
        cs = regressor.get_hyperparameter_search_space()
        self.assertEqual(
            cs.get_hyperparameter('feature_preprocessor:__choice__'),
            CategoricalHyperparameter('feature_preprocessor:__choice__',
                                      ['pca']))

        regressor = SimpleRegressionPipeline(
            exclude={'feature_preprocessor': ['no_preprocessing']})
        cs = regressor.get_hyperparameter_search_space()
        self.assertNotIn('no_preprocessing', str(cs))

Example #2

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

    def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier(
            self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'preprocessor': ['densifier']},
            dataset_properties={'sparse': True})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default,
                         'gradient_boosting')

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'preprocessor': ['nystroem_sampler']})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default,
                         'sgd')

Example #3

0

Show file

File: test_regression.py Project: namankumar/auto-sklearn

    def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier(
            self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'preprocessor': ['densifier']},
            dataset_properties={'sparse': True})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default,
                         'gradient_boosting')

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'preprocessor': ['nystroem_sampler']})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default,
                         'sgd')

Example #4

0

Show file

File: test_regression.py Project: namankumar/auto-sklearn

 def test_get_hyperparameter_search_space(self):
     cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
     self.assertIsInstance(cs, ConfigurationSpace)
     conditions = cs.get_conditions()
     hyperparameters = cs.get_hyperparameters()
     self.assertEqual(143, len(hyperparameters))
     self.assertEqual(len(hyperparameters) - 5, len(conditions))

Example #5

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

 def test_get_hyperparameter_search_space(self):
     cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
     self.assertIsInstance(cs, ConfigurationSpace)
     conditions = cs.get_conditions()
     hyperparameters = cs.get_hyperparameters()
     self.assertEqual(143, len(hyperparameters))
     self.assertEqual(len(hyperparameters) - 5, len(conditions))

Example #6

0

Show file

    def test_configurations(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space()

        print(cs)
        cs.seed(1)

        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if config['regressor:sgd:n_iter'] is not None:
                config._values['regressor:sgd:n_iter'] = 5

            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
            cls = SimpleRegressionPipeline(config, random_state=1)
            print(config)
            try:
                cls.fit(X_train, Y_train)
                X_test_ = X_test.copy()
                predictions = cls.predict(X_test)
                self.assertIsInstance(predictions, np.ndarray)
                predicted_probabiliets = cls.predict(X_test_)
                self.assertIsInstance(predicted_probabiliets, np.ndarray)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except MemoryError as e:
                continue

Example #7

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

    def test_get_hyperparameter_search_space_include_exclude_models(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'regressor': ['random_forest']})
        self.assertEqual(cs.get_hyperparameter('regressor:__choice__'),
            CategoricalHyperparameter('regressor:__choice__', ['random_forest']))

        # TODO add this test when more than one regressor is present
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            exclude={'regressor': ['random_forest']})
        self.assertNotIn('random_forest', str(cs))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'preprocessor': ['pca']})
        self.assertEqual(cs.get_hyperparameter('preprocessor:__choice__'),
            CategoricalHyperparameter('preprocessor:__choice__', ['pca']))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            exclude={'preprocessor': ['no_preprocessing']})
        self.assertNotIn('no_preprocessing', str(cs))

Example #8

0

Show file

File: test_regression.py Project: namankumar/auto-sklearn

    def test_configurations_sparse(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            dataset_properties={'sparse': True})
        print(cs)
        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if 'classifier:passive_aggressive:n_iter' in config and \
                            config[
                                'classifier:passive_aggressive:n_iter'] is not None:
                config._values['classifier:passive_aggressive:n_iter'] = 5
            if 'classifier:sgd:n_iter' in config and \
                            config['classifier:sgd:n_iter'] is not None:
                config._values['classifier:sgd:n_iter'] = 5

            print(config)
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
                                                           make_sparse=True)
            cls = SimpleRegressionPipeline(config, random_state=1)
            try:
                cls.fit(X_train, Y_train)
                predictions = cls.predict(X_test)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e

Example #9

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

    def test_configurations_sparse(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            dataset_properties={'sparse': True})
        print(cs)
        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if 'classifier:passive_aggressive:n_iter' in config and \
                            config[
                                'classifier:passive_aggressive:n_iter'] is not None:
                config._values['classifier:passive_aggressive:n_iter'] = 5
            if 'classifier:sgd:n_iter' in config and \
                            config['classifier:sgd:n_iter'] is not None:
                config._values['classifier:sgd:n_iter'] = 5

            print(config)
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
                                                           make_sparse=True)
            cls = SimpleRegressionPipeline(config, random_state=1)
            try:
                cls.fit(X_train, Y_train)
                predictions = cls.predict(X_test)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e

Example #10

0

Show file

File: test_regression.py Project: namankumar/auto-sklearn

 def test_default_configuration(self):
     for i in range(2):
         cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
         default = cs.get_default_configuration()
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = SimpleRegressionPipeline(default)
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.41732302035060087, r2_score)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertEqual(model_score, r2_score)

Example #11

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

 def test_default_configuration(self):
     for i in range(2):
         cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
         default = cs.get_default_configuration()
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = SimpleRegressionPipeline(default)
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.41732302035060087, r2_score)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertEqual(model_score, r2_score)

Example #12

0

Show file

File: test_regression.py Project: namankumar/auto-sklearn

    def test_predict_batched(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        cls = SimpleRegressionPipeline(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)

Example #13

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

    def test_predict_batched(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        cls = SimpleRegressionPipeline(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)

Example #14

0

Show file

File: test_regression.py Project: yochju/auto-sklearn

    def test_predict_batched(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'regressor': ['decision_tree']})
        default = cs.get_default_configuration()
        cls = SimpleRegressionPipeline(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = unittest.mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((292, ), prediction.shape)
        self.assertEqual(15, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)

Example #15

0

Show file

File: test_regression.py Project: xxtxiaofeng/auto-sklearn

    def test_set_hyperparameters_honors_configuration(self):
        """Makes sure that a given configuration is honored in practice.

        This method tests that the set hyperparameters actually create objects
        that comply with the given configuration. It iterates trough the pipeline to
        make sure we did not miss a step, but also checks at the end that every
        configuration from Config was checked
        """

        all_combinations = list(itertools.product([True, False], repeat=4))
        for sparse, multilabel, signed, multiclass, in all_combinations:
            dataset_properties = {
                'sparse': sparse,
                'multilabel': multilabel,
                'multiclass': multiclass,
                'signed': signed,
            }
            auto = SimpleRegressionPipeline(
                random_state=1,
                dataset_properties=dataset_properties,
            )
            cs = auto.get_hyperparameter_search_space()
            config = cs.sample_configuration()

            # Set hyperparameters takes a given config and translate
            # a config to an actual implementation
            auto.set_hyperparameters(config)
            config_dict = config.get_dictionary()

            # keys to check is our mechanism to ensure that every
            # every config key is checked
            keys_checked = []

            for name, step in auto.named_steps.items():
                if name == 'data_preprocessing':
                    # We have to check both the numerical and categorical
                    to_check = {
                        'numerical_transformer': step.numer_ppl.named_steps,
                        'categorical_transformer': step.categ_ppl.named_steps,
                    }

                    for data_type, pipeline in to_check.items():
                        for sub_name, sub_step in pipeline.items():
                            # If it is a Choice, make sure it is the correct one!
                            if isinstance(sub_step, AutoSklearnChoice):
                                key = "data_preprocessing:{}:{}:__choice__".format(
                                    data_type, sub_name)
                                keys_checked.extend(
                                    self._test_set_hyperparameter_choice(
                                        key, sub_step, config_dict))
                            # If it is a component, make sure it has the correct hyperparams
                            elif isinstance(sub_step, AutoSklearnComponent):
                                keys_checked.extend(
                                    self._test_set_hyperparameter_component(
                                        "data_preprocessing:{}:{}".format(
                                            data_type, sub_name), sub_step,
                                        config_dict))
                            else:
                                raise ValueError(
                                    "New type of pipeline component!")
                elif name == 'balancing':
                    keys_checked.extend(
                        self._test_set_hyperparameter_component(
                            'balancing', step, config_dict))
                elif name == 'feature_preprocessor':
                    keys_checked.extend(
                        self._test_set_hyperparameter_choice(
                            'feature_preprocessor:__choice__', step,
                            config_dict))
                elif name == 'regressor':
                    keys_checked.extend(
                        self._test_set_hyperparameter_choice(
                            'regressor:__choice__', step, config_dict))
                else:
                    raise ValueError(
                        "Found another type of step! Need to update this check"
                        " {}. ".format(name))

            # Make sure we checked the whole configuration
            self.assertSetEqual(set(config_dict.keys()), set(keys_checked))

Example #16

0

Show file

File: test_regression.py Project: TheVinhLuong102/auto-sklearn

    def test_set_hyperparameters_honors_configuration(self):
        """Makes sure that a given configuration is honored in practice.

        This method tests that the set hyperparameters actually create objects
        that comply with the given configuration. It iterates trough the pipeline to
        make sure we did not miss a step, but also checks at the end that every
        configuration from Config was checked

        Also considers random_state and ensures pipeline steps correctly recieve
        the right random_state
        """

        all_combinations = list(itertools.product([True, False], repeat=4))
        for sparse, multilabel, signed, multiclass, in all_combinations:
            dataset_properties = {
                'sparse': sparse,
                'multilabel': multilabel,
                'multiclass': multiclass,
                'signed': signed,
            }
            random_state = 1
            auto = SimpleRegressionPipeline(
                random_state=random_state,
                dataset_properties=dataset_properties,
            )
            cs = auto.get_hyperparameter_search_space()
            config = cs.sample_configuration()

            # Set hyperparameters takes a given config and translate
            # a config to an actual implementation
            auto.set_hyperparameters(config)
            config_dict = config.get_dictionary()

            # keys to check is our mechanism to ensure that every
            # every config key is checked
            keys_checked = []

            for name, step in auto.named_steps.items():
                if name == 'data_preprocessor':
                    keys_checked.extend(
                        self._test_set_hyperparameter_choice(
                            'data_preprocessor:__choice__', step, config_dict))
                    self.assertEqual(step.random_state, random_state)
                elif name == 'feature_preprocessor':
                    keys_checked.extend(
                        self._test_set_hyperparameter_choice(
                            'feature_preprocessor:__choice__', step,
                            config_dict))
                    self.assertEqual(step.random_state, random_state)
                elif name == 'regressor':
                    keys_checked.extend(
                        self._test_set_hyperparameter_choice(
                            'regressor:__choice__', step, config_dict))
                    self.assertEqual(step.random_state, random_state)
                else:
                    raise ValueError(
                        "Found another type of step! Need to update this check"
                        " {}. ".format(name))

            # Make sure we checked the whole configuration
            self.assertSetEqual(set(config_dict.keys()), set(keys_checked))

Example #17

0

Show file

File: test_regression.py Project: namankumar/auto-sklearn

 def test_repr(self):
     cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
     default = cs.get_default_configuration()
     representation = repr(SimpleRegressionPipeline(default))
     cls = eval(representation)
     self.assertIsInstance(cls, SimpleRegressionPipeline)

Example #18

0

Show file

File: test_regression.py Project: Ayaro/auto-sklearn

 def test_repr(self):
     cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
     default = cs.get_default_configuration()
     representation = repr(SimpleRegressionPipeline(default))
     cls = eval(representation)
     self.assertIsInstance(cls, SimpleRegressionPipeline)