def get_pipeline_config_space(self, algorithm_candidates): cs = ConfigurationSpace() estimator_choice = CategoricalHyperparameter( "estimator", algorithm_candidates, default_value=algorithm_candidates[0]) cs.add_hyperparameter(estimator_choice) if self.task_type == IMG_CLS: aug_space = get_aug_hyperparameter_space() cs.add_hyperparameters(aug_space.get_hyperparameters()) cs.add_conditions(aug_space.get_conditions()) for estimator_id in algorithm_candidates: sub_cs = self.get_model_config_space(estimator_id, include_estimator=False, include_aug=False) parent_hyperparameter = { 'parent': estimator_choice, 'value': estimator_id } cs.add_configuration_space( estimator_id, sub_cs, parent_hyperparameter=parent_hyperparameter) return cs
def test_add_conditions(self): cs1 = ConfigurationSpace() cs2 = ConfigurationSpace() hp1 = cs1.add_hyperparameter( CategoricalHyperparameter("input1", [0, 1])) cs2.add_hyperparameter(hp1) hp2 = cs1.add_hyperparameter( CategoricalHyperparameter("input2", [0, 1])) cs2.add_hyperparameter(hp2) hp3 = cs1.add_hyperparameter( UniformIntegerHyperparameter("child1", 0, 10)) cs2.add_hyperparameter(hp3) hp4 = cs1.add_hyperparameter( UniformIntegerHyperparameter("child2", 0, 10)) cs2.add_hyperparameter(hp4) cond1 = EqualsCondition(hp2, hp3, 0) cond2 = EqualsCondition(hp1, hp3, 5) cond3 = EqualsCondition(hp1, hp4, 1) andCond = AndConjunction(cond2, cond3) cs1.add_conditions([cond1, andCond]) cs2.add_condition(cond1) cs2.add_condition(andCond) self.assertEqual(str(cs1), str(cs2))
def set_optimizer_space(cs: ConfigurationSpace): ''' Set hyperparameters for optimizers ''' optimizer = CategoricalHyperparameter('optimizer', ['SGD', 'Adam'], default_value='Adam') sgd_lr = UniformFloatHyperparameter('sgd_lr', 0.00001, 0.1, default_value=0.005, log=True) # log scale sgd_decay = UniformFloatHyperparameter('sgd_decay', 0.0001, 0.1, default_value=0.05, log=True) # log scale sgd_momentum = UniformFloatHyperparameter('sgd_momentum', 0.3, 0.99, default_value=0.9) adam_lr = UniformFloatHyperparameter('adam_lr', 0.00001, 0.1, default_value=0.005, log=True) # log scale adam_decay = UniformFloatHyperparameter('adam_decay', 0.0001, 0.1, default_value=0.05, log=True) # log scale sgd_lr_cond = InCondition(child=sgd_lr, parent=optimizer, values=['SGD']) sgd_decay_cond = InCondition(child=sgd_decay, parent=optimizer, values=['SGD']) sgd_momentum_cond = InCondition(child=sgd_momentum, parent=optimizer, values=['SGD']) adam_lr_cond = InCondition(child=adam_lr, parent=optimizer, values=['Adam']) adam_decay_cond = InCondition(child=adam_decay, parent=optimizer, values=['Adam']) cs.add_hyperparameters( [optimizer, sgd_lr, sgd_decay, sgd_momentum, adam_lr, adam_decay]) cs.add_conditions([ sgd_lr_cond, sgd_decay_cond, sgd_momentum_cond, adam_lr_cond, adam_decay_cond ])
def test_add_configuration_space_conjunctions(self): cs1 = ConfigurationSpace() cs2 = ConfigurationSpace() hp1 = cs1.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1])) hp2 = cs1.add_hyperparameter(CategoricalHyperparameter("input2", [0, 1])) hp3 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child1", 0, 10)) hp4 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child2", 0, 10)) cond1 = EqualsCondition(hp2, hp3, 0) cond2 = EqualsCondition(hp1, hp3, 5) cond3 = EqualsCondition(hp1, hp4, 1) andCond = AndConjunction(cond2, cond3) cs1.add_conditions([cond1, andCond]) cs2.add_configuration_space(prefix='test', configuration_space=cs1) self.assertEqual(str(cs2).count('test:'), 10) # Check that they're equal except for the "test:" prefix self.assertEqual(str(cs1), str(cs2).replace('test:', ''))
def test_add_configuration_space_conjunctions(self): cs1 = ConfigurationSpace() cs2 = ConfigurationSpace() hp1 = cs1.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1])) hp2 = cs1.add_hyperparameter(CategoricalHyperparameter("input2", [0, 1])) hp3 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child1", 0, 10)) hp4 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child2", 0, 10)) cond1 = EqualsCondition(hp2, hp3, 0) cond2 = EqualsCondition(hp1, hp3, 5) cond3 = EqualsCondition(hp1, hp4, 1) andCond = AndConjunction(cond2, cond3) cs1.add_conditions([cond1, andCond]) cs2.add_configuration_space(prefix='test', configuration_space=cs1) self.assertEqual(str(cs2).count('test:'), 10) # Check that they're equal except for the "test:" prefix self.assertEqual(str(cs1), str(cs2).replace('test:', ''))
def get_model_config_space(self, estimator_id, include_estimator=True, include_aug=True): if estimator_id in self._estimators: clf_class = self._estimators[estimator_id] elif estimator_id in self._addons.components: clf_class = self._addons.components[estimator_id] else: raise ValueError("Algorithm %s not supported!" % estimator_id) default_cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", estimator_id) if include_estimator: default_cs.add_hyperparameter(model) if self.task_type == IMG_CLS and include_aug is True: aug_space = get_aug_hyperparameter_space() default_cs.add_hyperparameters(aug_space.get_hyperparameters()) default_cs.add_conditions(aug_space.get_conditions()) # Update configuration space according to config file all_cs = self.update_cs.get('all', ConfigurationSpace()) all_hp_names = all_cs.get_hyperparameter_names() estimator_cs = self.update_cs.get(estimator_id, ConfigurationSpace()) estimator_hp_names = estimator_cs.get_hyperparameter_names() cs = ConfigurationSpace() for hp_name in default_cs.get_hyperparameter_names(): if hp_name in estimator_hp_names: cs.add_hyperparameter(estimator_cs.get_hyperparameter(hp_name)) elif hp_name in all_hp_names: cs.add_hyperparameter(all_cs.get_hyperparameter(hp_name)) else: cs.add_hyperparameter(default_cs.get_hyperparameter(hp_name)) cond = default_cs.get_conditions() cs.add_conditions(cond) return cs
def test_add_conditions(self): cs1 = ConfigurationSpace() cs2 = ConfigurationSpace() hp1 = cs1.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1])) cs2.add_hyperparameter(hp1) hp2 = cs1.add_hyperparameter(CategoricalHyperparameter("input2", [0, 1])) cs2.add_hyperparameter(hp2) hp3 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child1", 0, 10)) cs2.add_hyperparameter(hp3) hp4 = cs1.add_hyperparameter(UniformIntegerHyperparameter("child2", 0, 10)) cs2.add_hyperparameter(hp4) cond1 = EqualsCondition(hp2, hp3, 0) cond2 = EqualsCondition(hp1, hp3, 5) cond3 = EqualsCondition(hp1, hp4, 1) andCond = AndConjunction(cond2, cond3) cs1.add_conditions([cond1, andCond]) cs2.add_condition(cond1) cs2.add_condition(andCond) self.assertEqual(str(cs1), str(cs2))
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, time_limit=None, trial_num=0, metric='acc', optimizer='smac', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): super(JointBlock, self).__init__(node_list, node_index, task_type, timestamp, fe_config_space, cash_config_space, data, fixed_config=fixed_config, time_limit=time_limit, trial_num=trial_num, metric=metric, optimizer=optimizer, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) self.fixed_config = fixed_config # Combine configuration space cs = ConfigurationSpace() if fe_config_space is not None: cs.add_hyperparameters(fe_config_space.get_hyperparameters()) cs.add_conditions(fe_config_space.get_conditions()) cs.add_forbidden_clauses(fe_config_space.get_forbiddens()) if cash_config_space is not None: cs.add_hyperparameters(cash_config_space.get_hyperparameters()) cs.add_conditions(cash_config_space.get_conditions()) cs.add_forbidden_clauses(cash_config_space.get_forbiddens()) self.joint_cs = cs # Define evaluator and optimizer if self.task_type in CLS_TASKS: from mindware.components.evaluators.cls_evaluator import ClassificationEvaluator self.evaluator = ClassificationEvaluator( fixed_config=fixed_config, scorer=self.metric, data_node=self.original_data, if_imbal=self.if_imbal, timestamp=self.timestamp, output_dir=self.output_dir, seed=self.seed, resampling_strategy=self.eval_type, resampling_params=self.resampling_params) else: from mindware.components.evaluators.rgs_evaluator import RegressionEvaluator self.evaluator = RegressionEvaluator( fixed_config=fixed_config, scorer=self.metric, data_node=self.original_data, timestamp=self.timestamp, output_dir=self.output_dir, seed=self.seed, resampling_strategy=self.eval_type, resampling_params=self.resampling_params) self.optimizer = build_hpo_optimizer( self.eval_type, self.evaluator, self.joint_cs, optimizer=self.optimizer, output_dir=self.output_dir, per_run_time_limit=self.per_run_time_limit, inner_iter_num_per_iter=1, timestamp=self.timestamp, seed=self.seed, n_jobs=self.n_jobs)
def test_generate_grid(self): '''Test grid generation''' # Sub-test 1 cs = ConfigurationSpace(seed=1234) cat1 = CategoricalHyperparameter(name='cat1', choices=['T', 'F']) const1 = Constant(name='const1', value=4) float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) int1 = UniformIntegerHyperparameter(name='int1', lower=10, upper=100, log=True) ord1 = OrdinalHyperparameter(name='ord1', sequence=['1', '2', '3']) cs.add_hyperparameters([float1, int1, cat1, ord1, const1]) num_steps_dict = {'float1': 11, 'int1': 6} generated_grid = generate_grid(cs, num_steps_dict) # Check randomly pre-selected values in the generated_grid # 2 * 1 * 11 * 6 * 3 total diff. possible configurations self.assertEqual(len(generated_grid), 396) # Check 1st and last generated configurations completely: first_expected_dict = { 'cat1': 'T', 'const1': 4, 'float1': -1.0, 'int1': 10, 'ord1': '1' } last_expected_dict = { 'cat1': 'F', 'const1': 4, 'float1': 1.0, 'int1': 100, 'ord1': '3' } self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) self.assertEqual(generated_grid[198].get_dictionary()['cat1'], 'F') self.assertEqual(generated_grid[45].get_dictionary()['const1'], 4) # The 2 most frequently changing HPs (int1 and ord1) have 3 * 6 = 18 different values for # each value of float1, so the 4th value of float1 of -0.4 is reached after # 3 * 18 = 54 values in the generated_grid (and remains the same for the next 18 values): for i in range(18): self.assertAlmostEqual( generated_grid[54 + i].get_dictionary()['float1'], -0.4, places=2) # 5th diff. value for int1 after 4 * 3 = 12 values. Reasoning as above. self.assertEqual(generated_grid[12].get_dictionary()['int1'], 63) self.assertEqual(generated_grid[3].get_dictionary()['ord1'], '1') self.assertEqual(generated_grid[4].get_dictionary()['ord1'], '2') self.assertEqual(generated_grid[5].get_dictionary()['ord1'], '3') # Sub-test 2 # Test for extreme cases: only numerical cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([float1, int1]) num_steps_dict = {'float1': 11, 'int1': 6} generated_grid = generate_grid(cs, num_steps_dict) self.assertEqual(len(generated_grid), 66) # Check 1st and last generated configurations completely: first_expected_dict = {'float1': -1.0, 'int1': 10} last_expected_dict = {'float1': 1.0, 'int1': 100} self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) # Test: only categorical cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([cat1]) generated_grid = generate_grid(cs) self.assertEqual(len(generated_grid), 2) # Check 1st and last generated configurations completely: self.assertEqual(generated_grid[0].get_dictionary()['cat1'], 'T') self.assertEqual(generated_grid[-1].get_dictionary()['cat1'], 'F') # Test: only constant cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([const1]) generated_grid = generate_grid(cs) self.assertEqual(len(generated_grid), 1) # Check 1st and only generated configuration completely: self.assertEqual(generated_grid[0].get_dictionary()['const1'], 4) # Test: no hyperparameters yet cs = ConfigurationSpace(seed=1234) generated_grid = generate_grid(cs, num_steps_dict) # For the case of no hyperparameters, in get_cartesian_product, itertools.product() returns # a single empty tuple element which leads to a single empty Configuration. self.assertEqual(len(generated_grid), 0) # Sub-test 3 # Tests for quantization and conditional spaces. num_steps_dict supports specifying steps # for only some of the int and float HPs. The rest are taken from the 'q' member variables # of these HPs. The conditional space tested has 2 levels of conditions. cs2 = ConfigurationSpace(seed=123) float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) int1 = UniformIntegerHyperparameter(name='int1', lower=0, upper=1000, log=False, q=500) cs2.add_hyperparameters([float1, int1]) int2_cond = UniformIntegerHyperparameter(name='int2_cond', lower=10, upper=100, log=True) cs2.add_hyperparameters([int2_cond]) cond_1 = AndConjunction(LessThanCondition(int2_cond, float1, -0.5), GreaterThanCondition(int2_cond, int1, 600)) cs2.add_conditions([cond_1]) cat1_cond = CategoricalHyperparameter(name='cat1_cond', choices=['apple', 'orange']) cs2.add_hyperparameters([cat1_cond]) cond_2 = AndConjunction(GreaterThanCondition(cat1_cond, int1, 300), LessThanCondition(cat1_cond, int1, 700), GreaterThanCondition(cat1_cond, float1, -0.5), LessThanCondition(cat1_cond, float1, 0.5)) cs2.add_conditions([cond_2]) float2_cond = UniformFloatHyperparameter(name='float2_cond', lower=10., upper=100., log=True) # 2nd level dependency in ConfigurationSpace tree being tested cs2.add_hyperparameters([float2_cond]) cond_3 = GreaterThanCondition(float2_cond, int2_cond, 50) cs2.add_conditions([cond_3]) num_steps_dict1 = {'float1': 4, 'int2_cond': 3, 'float2_cond': 3} generated_grid = generate_grid(cs2, num_steps_dict1) self.assertEqual(len(generated_grid), 18) # RR: I manually generated the grid and verified the values were correct. # Check 1st and last generated configurations completely: first_expected_dict = {'float1': -1.0, 'int1': 0} last_expected_dict = { 'float1': -1.0, 'int1': 1000, 'int2_cond': 100, 'float2_cond': 100.0 } self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) # Here, we test that a few randomly chosen values in the generated grid # correspond to the ones I checked. self.assertEqual(generated_grid[3].get_dictionary()['int1'], 1000) self.assertEqual(generated_grid[12].get_dictionary()['cat1_cond'], 'orange') self.assertAlmostEqual( generated_grid[-2].get_dictionary()['float2_cond'], 31.622776601683803, places=3) # Sub-test 4 # Test: only a single hyperparameter and num_steps_dict is None cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([float1]) num_steps_dict = {'float1': 11} try: generated_grid = generate_grid(cs) except ValueError as e: assert str(e) == "num_steps_dict is None or doesn't contain " \ "the number of points to divide float1 into. And its quantization " \ "factor is None. Please provide/set one of these values." generated_grid = generate_grid(cs, num_steps_dict) self.assertEqual(len(generated_grid), 11) # Check 1st and last generated configurations completely: self.assertEqual(generated_grid[0].get_dictionary()['float1'], -1.0) self.assertEqual(generated_grid[-1].get_dictionary()['float1'], 1.0)