Ejemplo n.º 1
0
    def get_pipeline_config_space(self, algorithm_candidates):
        cs = ConfigurationSpace()
        estimator_choice = CategoricalHyperparameter(
            "estimator",
            algorithm_candidates,
            default_value=algorithm_candidates[0])
        cs.add_hyperparameter(estimator_choice)
        if self.task_type == IMG_CLS:
            aug_space = get_aug_hyperparameter_space()
            cs.add_hyperparameters(aug_space.get_hyperparameters())
            cs.add_conditions(aug_space.get_conditions())

        for estimator_id in algorithm_candidates:
            sub_cs = self.get_model_config_space(estimator_id,
                                                 include_estimator=False,
                                                 include_aug=False)
            parent_hyperparameter = {
                'parent': estimator_choice,
                'value': estimator_id
            }
            cs.add_configuration_space(
                estimator_id,
                sub_cs,
                parent_hyperparameter=parent_hyperparameter)
        return cs
Ejemplo n.º 2
0
    def test_add_conditions(self):
        cs1 = ConfigurationSpace()
        cs2 = ConfigurationSpace()

        hp1 = cs1.add_hyperparameter(
            CategoricalHyperparameter("input1", [0, 1]))
        cs2.add_hyperparameter(hp1)
        hp2 = cs1.add_hyperparameter(
            CategoricalHyperparameter("input2", [0, 1]))
        cs2.add_hyperparameter(hp2)
        hp3 = cs1.add_hyperparameter(
            UniformIntegerHyperparameter("child1", 0, 10))
        cs2.add_hyperparameter(hp3)
        hp4 = cs1.add_hyperparameter(
            UniformIntegerHyperparameter("child2", 0, 10))
        cs2.add_hyperparameter(hp4)

        cond1 = EqualsCondition(hp2, hp3, 0)
        cond2 = EqualsCondition(hp1, hp3, 5)
        cond3 = EqualsCondition(hp1, hp4, 1)
        andCond = AndConjunction(cond2, cond3)

        cs1.add_conditions([cond1, andCond])
        cs2.add_condition(cond1)
        cs2.add_condition(andCond)

        self.assertEqual(str(cs1), str(cs2))
Ejemplo n.º 3
0
    def set_optimizer_space(cs: ConfigurationSpace):
        '''
        Set hyperparameters for optimizers
        '''
        optimizer = CategoricalHyperparameter('optimizer', ['SGD', 'Adam'],
                                              default_value='Adam')
        sgd_lr = UniformFloatHyperparameter('sgd_lr',
                                            0.00001,
                                            0.1,
                                            default_value=0.005,
                                            log=True)  # log scale
        sgd_decay = UniformFloatHyperparameter('sgd_decay',
                                               0.0001,
                                               0.1,
                                               default_value=0.05,
                                               log=True)  # log scale
        sgd_momentum = UniformFloatHyperparameter('sgd_momentum',
                                                  0.3,
                                                  0.99,
                                                  default_value=0.9)
        adam_lr = UniformFloatHyperparameter('adam_lr',
                                             0.00001,
                                             0.1,
                                             default_value=0.005,
                                             log=True)  # log scale
        adam_decay = UniformFloatHyperparameter('adam_decay',
                                                0.0001,
                                                0.1,
                                                default_value=0.05,
                                                log=True)  # log scale

        sgd_lr_cond = InCondition(child=sgd_lr,
                                  parent=optimizer,
                                  values=['SGD'])
        sgd_decay_cond = InCondition(child=sgd_decay,
                                     parent=optimizer,
                                     values=['SGD'])
        sgd_momentum_cond = InCondition(child=sgd_momentum,
                                        parent=optimizer,
                                        values=['SGD'])
        adam_lr_cond = InCondition(child=adam_lr,
                                   parent=optimizer,
                                   values=['Adam'])
        adam_decay_cond = InCondition(child=adam_decay,
                                      parent=optimizer,
                                      values=['Adam'])

        cs.add_hyperparameters(
            [optimizer, sgd_lr, sgd_decay, sgd_momentum, adam_lr, adam_decay])
        cs.add_conditions([
            sgd_lr_cond, sgd_decay_cond, sgd_momentum_cond, adam_lr_cond,
            adam_decay_cond
        ])
    def test_add_configuration_space_conjunctions(self):
        cs1 = ConfigurationSpace()
        cs2 = ConfigurationSpace()

        hp1 = cs1.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1]))
        hp2 = cs1.add_hyperparameter(CategoricalHyperparameter("input2", [0, 1]))
        hp3 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child1", 0, 10))
        hp4 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child2", 0, 10))

        cond1 = EqualsCondition(hp2, hp3, 0)
        cond2 = EqualsCondition(hp1, hp3, 5)
        cond3 = EqualsCondition(hp1, hp4, 1)
        andCond = AndConjunction(cond2, cond3)

        cs1.add_conditions([cond1, andCond])
        cs2.add_configuration_space(prefix='test', configuration_space=cs1)

        self.assertEqual(str(cs2).count('test:'), 10)
        # Check that they're equal except for the "test:" prefix
        self.assertEqual(str(cs1), str(cs2).replace('test:', ''))
Ejemplo n.º 5
0
    def test_add_configuration_space_conjunctions(self):
        cs1 = ConfigurationSpace()
        cs2 = ConfigurationSpace()

        hp1 = cs1.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1]))
        hp2 = cs1.add_hyperparameter(CategoricalHyperparameter("input2", [0, 1]))
        hp3 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child1", 0, 10))
        hp4 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child2", 0, 10))

        cond1 = EqualsCondition(hp2, hp3, 0)
        cond2 = EqualsCondition(hp1, hp3, 5)
        cond3 = EqualsCondition(hp1, hp4, 1)
        andCond = AndConjunction(cond2, cond3)

        cs1.add_conditions([cond1, andCond])
        cs2.add_configuration_space(prefix='test', configuration_space=cs1)

        self.assertEqual(str(cs2).count('test:'), 10)
        # Check that they're equal except for the "test:" prefix
        self.assertEqual(str(cs1), str(cs2).replace('test:', ''))
Ejemplo n.º 6
0
    def get_model_config_space(self,
                               estimator_id,
                               include_estimator=True,
                               include_aug=True):
        if estimator_id in self._estimators:
            clf_class = self._estimators[estimator_id]
        elif estimator_id in self._addons.components:
            clf_class = self._addons.components[estimator_id]
        else:
            raise ValueError("Algorithm %s not supported!" % estimator_id)

        default_cs = clf_class.get_hyperparameter_search_space()
        model = UnParametrizedHyperparameter("estimator", estimator_id)
        if include_estimator:
            default_cs.add_hyperparameter(model)
        if self.task_type == IMG_CLS and include_aug is True:
            aug_space = get_aug_hyperparameter_space()
            default_cs.add_hyperparameters(aug_space.get_hyperparameters())
            default_cs.add_conditions(aug_space.get_conditions())

        # Update configuration space according to config file
        all_cs = self.update_cs.get('all', ConfigurationSpace())
        all_hp_names = all_cs.get_hyperparameter_names()
        estimator_cs = self.update_cs.get(estimator_id, ConfigurationSpace())
        estimator_hp_names = estimator_cs.get_hyperparameter_names()

        cs = ConfigurationSpace()
        for hp_name in default_cs.get_hyperparameter_names():
            if hp_name in estimator_hp_names:
                cs.add_hyperparameter(estimator_cs.get_hyperparameter(hp_name))
            elif hp_name in all_hp_names:
                cs.add_hyperparameter(all_cs.get_hyperparameter(hp_name))
            else:
                cs.add_hyperparameter(default_cs.get_hyperparameter(hp_name))

        cond = default_cs.get_conditions()
        cs.add_conditions(cond)

        return cs
Ejemplo n.º 7
0
    def test_add_conditions(self):
        cs1 = ConfigurationSpace()
        cs2 = ConfigurationSpace()

        hp1 = cs1.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1]))
        cs2.add_hyperparameter(hp1)
        hp2 = cs1.add_hyperparameter(CategoricalHyperparameter("input2", [0, 1]))
        cs2.add_hyperparameter(hp2)
        hp3 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child1", 0, 10))
        cs2.add_hyperparameter(hp3)
        hp4 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child2", 0, 10))
        cs2.add_hyperparameter(hp4)

        cond1 = EqualsCondition(hp2, hp3, 0)
        cond2 = EqualsCondition(hp1, hp3, 5)
        cond3 = EqualsCondition(hp1, hp4, 1)
        andCond = AndConjunction(cond2, cond3)

        cs1.add_conditions([cond1, andCond])
        cs2.add_condition(cond1)
        cs2.add_condition(andCond)

        self.assertEqual(str(cs1), str(cs2))
Ejemplo n.º 8
0
    def __init__(self,
                 node_list,
                 node_index,
                 task_type,
                 timestamp,
                 fe_config_space: ConfigurationSpace,
                 cash_config_space: ConfigurationSpace,
                 data: DataNode,
                 fixed_config=None,
                 time_limit=None,
                 trial_num=0,
                 metric='acc',
                 optimizer='smac',
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 per_run_time_limit=300,
                 output_dir="logs",
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 resampling_params=None,
                 n_jobs=1,
                 seed=1):
        super(JointBlock, self).__init__(node_list,
                                         node_index,
                                         task_type,
                                         timestamp,
                                         fe_config_space,
                                         cash_config_space,
                                         data,
                                         fixed_config=fixed_config,
                                         time_limit=time_limit,
                                         trial_num=trial_num,
                                         metric=metric,
                                         optimizer=optimizer,
                                         ensemble_method=ensemble_method,
                                         ensemble_size=ensemble_size,
                                         per_run_time_limit=per_run_time_limit,
                                         output_dir=output_dir,
                                         dataset_name=dataset_name,
                                         eval_type=eval_type,
                                         resampling_params=resampling_params,
                                         n_jobs=n_jobs,
                                         seed=seed)

        self.fixed_config = fixed_config

        # Combine configuration space
        cs = ConfigurationSpace()
        if fe_config_space is not None:
            cs.add_hyperparameters(fe_config_space.get_hyperparameters())
            cs.add_conditions(fe_config_space.get_conditions())
            cs.add_forbidden_clauses(fe_config_space.get_forbiddens())
        if cash_config_space is not None:
            cs.add_hyperparameters(cash_config_space.get_hyperparameters())
            cs.add_conditions(cash_config_space.get_conditions())
            cs.add_forbidden_clauses(cash_config_space.get_forbiddens())
        self.joint_cs = cs

        # Define evaluator and optimizer
        if self.task_type in CLS_TASKS:
            from mindware.components.evaluators.cls_evaluator import ClassificationEvaluator
            self.evaluator = ClassificationEvaluator(
                fixed_config=fixed_config,
                scorer=self.metric,
                data_node=self.original_data,
                if_imbal=self.if_imbal,
                timestamp=self.timestamp,
                output_dir=self.output_dir,
                seed=self.seed,
                resampling_strategy=self.eval_type,
                resampling_params=self.resampling_params)
        else:
            from mindware.components.evaluators.rgs_evaluator import RegressionEvaluator
            self.evaluator = RegressionEvaluator(
                fixed_config=fixed_config,
                scorer=self.metric,
                data_node=self.original_data,
                timestamp=self.timestamp,
                output_dir=self.output_dir,
                seed=self.seed,
                resampling_strategy=self.eval_type,
                resampling_params=self.resampling_params)

        self.optimizer = build_hpo_optimizer(
            self.eval_type,
            self.evaluator,
            self.joint_cs,
            optimizer=self.optimizer,
            output_dir=self.output_dir,
            per_run_time_limit=self.per_run_time_limit,
            inner_iter_num_per_iter=1,
            timestamp=self.timestamp,
            seed=self.seed,
            n_jobs=self.n_jobs)
Ejemplo n.º 9
0
    def test_generate_grid(self):
        '''Test grid generation'''

        # Sub-test 1
        cs = ConfigurationSpace(seed=1234)

        cat1 = CategoricalHyperparameter(name='cat1', choices=['T', 'F'])
        const1 = Constant(name='const1', value=4)
        float1 = UniformFloatHyperparameter(name='float1',
                                            lower=-1,
                                            upper=1,
                                            log=False)
        int1 = UniformIntegerHyperparameter(name='int1',
                                            lower=10,
                                            upper=100,
                                            log=True)
        ord1 = OrdinalHyperparameter(name='ord1', sequence=['1', '2', '3'])

        cs.add_hyperparameters([float1, int1, cat1, ord1, const1])

        num_steps_dict = {'float1': 11, 'int1': 6}
        generated_grid = generate_grid(cs, num_steps_dict)

        # Check randomly pre-selected values in the generated_grid
        # 2 * 1 * 11 * 6 * 3 total diff. possible configurations
        self.assertEqual(len(generated_grid), 396)
        # Check 1st and last generated configurations completely:
        first_expected_dict = {
            'cat1': 'T',
            'const1': 4,
            'float1': -1.0,
            'int1': 10,
            'ord1': '1'
        }
        last_expected_dict = {
            'cat1': 'F',
            'const1': 4,
            'float1': 1.0,
            'int1': 100,
            'ord1': '3'
        }
        self.assertEqual(generated_grid[0].get_dictionary(),
                         first_expected_dict)
        self.assertEqual(generated_grid[-1].get_dictionary(),
                         last_expected_dict)
        self.assertEqual(generated_grid[198].get_dictionary()['cat1'], 'F')
        self.assertEqual(generated_grid[45].get_dictionary()['const1'], 4)
        # The 2 most frequently changing HPs (int1 and ord1) have 3 * 6 = 18 different values for
        # each value of float1, so the 4th value of float1 of -0.4 is reached after
        # 3 * 18 = 54 values in the generated_grid (and remains the same for the next 18 values):
        for i in range(18):
            self.assertAlmostEqual(
                generated_grid[54 + i].get_dictionary()['float1'],
                -0.4,
                places=2)
        # 5th diff. value for int1 after 4 * 3 = 12 values. Reasoning as above.
        self.assertEqual(generated_grid[12].get_dictionary()['int1'], 63)
        self.assertEqual(generated_grid[3].get_dictionary()['ord1'], '1')
        self.assertEqual(generated_grid[4].get_dictionary()['ord1'], '2')
        self.assertEqual(generated_grid[5].get_dictionary()['ord1'], '3')

        # Sub-test 2
        # Test for extreme cases: only numerical
        cs = ConfigurationSpace(seed=1234)
        cs.add_hyperparameters([float1, int1])

        num_steps_dict = {'float1': 11, 'int1': 6}
        generated_grid = generate_grid(cs, num_steps_dict)

        self.assertEqual(len(generated_grid), 66)
        # Check 1st and last generated configurations completely:
        first_expected_dict = {'float1': -1.0, 'int1': 10}
        last_expected_dict = {'float1': 1.0, 'int1': 100}
        self.assertEqual(generated_grid[0].get_dictionary(),
                         first_expected_dict)
        self.assertEqual(generated_grid[-1].get_dictionary(),
                         last_expected_dict)

        # Test: only categorical
        cs = ConfigurationSpace(seed=1234)
        cs.add_hyperparameters([cat1])

        generated_grid = generate_grid(cs)

        self.assertEqual(len(generated_grid), 2)
        # Check 1st and last generated configurations completely:
        self.assertEqual(generated_grid[0].get_dictionary()['cat1'], 'T')
        self.assertEqual(generated_grid[-1].get_dictionary()['cat1'], 'F')

        # Test: only constant
        cs = ConfigurationSpace(seed=1234)
        cs.add_hyperparameters([const1])

        generated_grid = generate_grid(cs)

        self.assertEqual(len(generated_grid), 1)
        # Check 1st and only generated configuration completely:
        self.assertEqual(generated_grid[0].get_dictionary()['const1'], 4)

        # Test: no hyperparameters yet
        cs = ConfigurationSpace(seed=1234)

        generated_grid = generate_grid(cs, num_steps_dict)

        # For the case of no hyperparameters, in get_cartesian_product, itertools.product() returns
        # a single empty tuple element which leads to a single empty Configuration.
        self.assertEqual(len(generated_grid), 0)

        # Sub-test 3
        # Tests for quantization and conditional spaces. num_steps_dict supports specifying steps
        # for only some of the int and float HPs. The rest are taken from the 'q' member variables
        # of these HPs. The conditional space tested has 2 levels of conditions.
        cs2 = ConfigurationSpace(seed=123)
        float1 = UniformFloatHyperparameter(name='float1',
                                            lower=-1,
                                            upper=1,
                                            log=False)
        int1 = UniformIntegerHyperparameter(name='int1',
                                            lower=0,
                                            upper=1000,
                                            log=False,
                                            q=500)
        cs2.add_hyperparameters([float1, int1])

        int2_cond = UniformIntegerHyperparameter(name='int2_cond',
                                                 lower=10,
                                                 upper=100,
                                                 log=True)
        cs2.add_hyperparameters([int2_cond])
        cond_1 = AndConjunction(LessThanCondition(int2_cond, float1, -0.5),
                                GreaterThanCondition(int2_cond, int1, 600))
        cs2.add_conditions([cond_1])
        cat1_cond = CategoricalHyperparameter(name='cat1_cond',
                                              choices=['apple', 'orange'])
        cs2.add_hyperparameters([cat1_cond])
        cond_2 = AndConjunction(GreaterThanCondition(cat1_cond, int1, 300),
                                LessThanCondition(cat1_cond, int1, 700),
                                GreaterThanCondition(cat1_cond, float1, -0.5),
                                LessThanCondition(cat1_cond, float1, 0.5))
        cs2.add_conditions([cond_2])
        float2_cond = UniformFloatHyperparameter(name='float2_cond',
                                                 lower=10.,
                                                 upper=100.,
                                                 log=True)
        # 2nd level dependency in ConfigurationSpace tree being tested
        cs2.add_hyperparameters([float2_cond])
        cond_3 = GreaterThanCondition(float2_cond, int2_cond, 50)
        cs2.add_conditions([cond_3])
        num_steps_dict1 = {'float1': 4, 'int2_cond': 3, 'float2_cond': 3}
        generated_grid = generate_grid(cs2, num_steps_dict1)
        self.assertEqual(len(generated_grid), 18)

        # RR: I manually generated the grid and verified the values were correct.
        # Check 1st and last generated configurations completely:
        first_expected_dict = {'float1': -1.0, 'int1': 0}
        last_expected_dict = {
            'float1': -1.0,
            'int1': 1000,
            'int2_cond': 100,
            'float2_cond': 100.0
        }
        self.assertEqual(generated_grid[0].get_dictionary(),
                         first_expected_dict)
        self.assertEqual(generated_grid[-1].get_dictionary(),
                         last_expected_dict)
        # Here, we test that a few randomly chosen values in the generated grid
        # correspond to the ones I checked.
        self.assertEqual(generated_grid[3].get_dictionary()['int1'], 1000)
        self.assertEqual(generated_grid[12].get_dictionary()['cat1_cond'],
                         'orange')
        self.assertAlmostEqual(
            generated_grid[-2].get_dictionary()['float2_cond'],
            31.622776601683803,
            places=3)

        # Sub-test 4
        # Test: only a single hyperparameter and num_steps_dict is None
        cs = ConfigurationSpace(seed=1234)
        cs.add_hyperparameters([float1])

        num_steps_dict = {'float1': 11}
        try:
            generated_grid = generate_grid(cs)
        except ValueError as e:
            assert str(e) == "num_steps_dict is None or doesn't contain " \
                            "the number of points to divide float1 into. And its quantization " \
                            "factor is None. Please provide/set one of these values."

        generated_grid = generate_grid(cs, num_steps_dict)

        self.assertEqual(len(generated_grid), 11)
        # Check 1st and last generated configurations completely:
        self.assertEqual(generated_grid[0].get_dictionary()['float1'], -1.0)
        self.assertEqual(generated_grid[-1].get_dictionary()['float1'], 1.0)