def get_configspace(self, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() penalty = CategoricalHyperparameter( "penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter( "loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') # This is set ad-hoc tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge") ) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge") ) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "True"), ForbiddenEqualsClause(penalty, "l1") ) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'penalty': hp.choice('liblinear_combination', [{'penalty': "l1", 'loss': "squared_hinge", 'dual': "False"}, {'penalty': "l2", 'loss': "hinge", 'dual': "True"}, {'penalty': "l2", 'loss': "squared_hinge", 'dual': "True"}, {'penalty': "l2", 'loss': "squared_hinge", 'dual': "False"}]), 'loss': None, 'dual': None, 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), 'multi_class': hp.choice('liblinear_multi_class', ["ovr"]), 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1])} return space else: raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
def _get_configuration_space(self) -> ConfigurationSpace: """Get the configuration space for the random forest. Returns ------- ConfigurationSpace """ cfg = ConfigurationSpace() cfg.seed(int(self.rs.randint(0, 1000))) num_trees = Constant("num_trees", value=N_TREES) bootstrap = CategoricalHyperparameter( "do_bootstrapping", choices=(self.bootstrap, ), default_value=self.bootstrap, ) max_feats = CategoricalHyperparameter("max_features", choices=(3 / 6, 4 / 6, 5 / 6, 1), default_value=1) min_split = UniformIntegerHyperparameter("min_samples_to_split", lower=1, upper=10, default_value=2) min_leavs = UniformIntegerHyperparameter("min_samples_in_leaf", lower=1, upper=10, default_value=1) cfg.add_hyperparameters( [num_trees, bootstrap, max_feats, min_split, min_leavs]) return cfg
def test_fix_types(self): # Test categorical and ordinal for hyperparameter_type in [CategoricalHyperparameter, OrdinalHyperparameter]: cs = ConfigurationSpace() cs.add_hyperparameters([ hyperparameter_type('bools', [True, False]), hyperparameter_type('ints', [1, 2, 3, 4, 5]), hyperparameter_type('floats', [1.5, 2.5, 3.5, 4.5, 5.5]), hyperparameter_type('str', ['string', 'ding', 'dong']), hyperparameter_type('mixed', [2, True, 1.5, 'string', False, 'False']), ]) c = cs.get_default_configuration().get_dictionary() # Check bools for b in [False, True]: c['bools'] = b c_str = {k: str(v) for k, v in c.items()} self.assertEqual(fix_types(c_str, cs), c) # Check legal mixed values for m in [2, True, 1.5, 'string']: c['mixed'] = m c_str = {k: str(v) for k, v in c.items()} self.assertEqual(fix_types(c_str, cs), c) # Check error on cornercase that cannot be caught for m in [False, 'False']: c['mixed'] = m c_str = {k: str(v) for k, v in c.items()} self.assertRaises(ValueError, fix_types, c_str, cs) # Test constant for m in [2, 1.5, 'string']: cs = ConfigurationSpace() cs.add_hyperparameter(Constant('constant', m)) c = cs.get_default_configuration().get_dictionary() c_str = {k: str(v) for k, v in c.items()} self.assertEqual(fix_types(c_str, cs), c)
def test_random_neighbor_failing(self): hp = Constant('a', 'b') self.assertRaisesRegex(ValueError, 'Probably caught in an infinite ' 'loop.', self._test_random_neigbor, hp) hp = CategoricalHyperparameter('a', ['a']) self.assertRaisesRegex(ValueError, 'Probably caught in an infinite ' 'loop.', self._test_random_neigbor, hp)
def test_estimate_size(self): cs = ConfigurationSpace() self.assertEqual(cs.estimate_size(), 0) cs.add_hyperparameter(Constant('constant', 0)) self.assertEqual(cs.estimate_size(), 1) cs.add_hyperparameter(UniformIntegerHyperparameter('integer', 0, 5)) self.assertEqual(cs.estimate_size(), 6) cs.add_hyperparameter(CategoricalHyperparameter('cat', [0, 1, 2])) self.assertEqual(cs.estimate_size(), 18) cs.add_hyperparameter(UniformFloatHyperparameter('float', 0, 1)) self.assertTrue(np.isinf(cs.estimate_size()))
def ordinal(label: str, sequence: List, default=None): if len(sequence) == 1: return Constant(label, _encode(sequence[0])) choices = [] for option in sequence: choices.append(_encode(option)) kwargs = {} if default: kwargs.update({'default_value': _encode(default)}) hp = OrdinalHyperparameter(label, choices, **kwargs) return hp
def choice(label: str, options: List, default=None): if len(options) == 1: return Constant(label, _encode(options[0])) choices = [] for option in options: choices.append(_encode(option)) kwargs = {} if default: kwargs.update({'default_value': _encode(default)}) hp = CategoricalHyperparameter(label, choices, **kwargs) return hp
def test_sample_configuration(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace(seed=1) hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) cs.add_condition(cond5) cs.add_condition(conj3) samples = [] for i in range(5): cs.seed(1) samples.append([]) for j in range(100): sample = cs.sample_configuration() samples[-1].append(sample) if i > 0: for j in range(100): self.assertEqual(samples[-1][j], samples[-2][j])
def test_get_hyperparameters_topological_sort(self): # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = CategoricalHyperparameter("input4", [0, 1]) hp5 = CategoricalHyperparameter("input5", [0, 1]) hp6 = Constant("AND", "True") # More top-level hyperparameters hp7 = CategoricalHyperparameter("input7", [0, 1]) # Somewhat shuffled hyperparameters = [hp1, hp2, hp3, hp4, hp5, hp6, hp7] for hp in hyperparameters: cs.add_hyperparameter(hp) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) hps = cs.get_hyperparameters() # AND is moved to the front because of alphabetical sorting for hp, idx in zip(hyperparameters, [1, 2, 3, 4, 6, 0, 5]): self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name) cs.add_condition(cond5) hps = cs.get_hyperparameters() for hp, idx in zip(hyperparameters, [1, 2, 3, 6, 5, 0, 4]): self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name) cs.add_condition(conj3) hps = cs.get_hyperparameters() # print(hps, hyperparameters) for hp, idx in zip(hyperparameters, [0, 1, 2, 5, 4, 6, 3]): # print(hp, idx) self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name)
def __parse_dict_to_config(self, key, value): if isinstance(value, dict): _type = value.get("_type") _value = value.get("_value") _default = value.get("_default") assert _value is not None if _type in ("choice", "ordinal"): return eval(f"hp_def.{_type}(key, _value, _default)") else: return eval( f'''hp_def.{_type}("{key}",*_value,default=_default)''') else: return Constant(key, hp_def._encode(value))
def __parse_dict_to_config(self, key, value): if isinstance(value, dict): _type = value.get("_type") _value = value.get("_value") _default = value.get("_default") assert _value is not None if _type == "choice": return smac_hdl.choice(key, _value, _default) else: return eval( f'''smac_hdl.{_type}("{key}",*_value,default=_default)''') else: return Constant(key, smac_hdl._encode(value))
def test_meta_field(self): cs = ConfigurationSpace() cs.add_hyperparameter( UniformIntegerHyperparameter("uihp", lower=1, upper=10, meta=dict(uihp=True))) cs.add_hyperparameter( NormalIntegerHyperparameter("nihp", mu=0, sigma=1, meta=dict(nihp=True))) cs.add_hyperparameter( UniformFloatHyperparameter("ufhp", lower=1, upper=10, meta=dict(ufhp=True))) cs.add_hyperparameter( NormalFloatHyperparameter("nfhp", mu=0, sigma=1, meta=dict(nfhp=True))) cs.add_hyperparameter( CategoricalHyperparameter("chp", choices=['1', '2', '3'], meta=dict(chp=True))) cs.add_hyperparameter( OrdinalHyperparameter("ohp", sequence=['1', '2', '3'], meta=dict(ohp=True))) cs.add_hyperparameter(Constant("const", value=1, meta=dict(const=True))) parent = ConfigurationSpace() parent.add_configuration_space("sub", cs, delimiter=':') self.assertEqual( parent.get_hyperparameter("sub:uihp").meta, dict(uihp=True)) self.assertEqual( parent.get_hyperparameter("sub:nihp").meta, dict(nihp=True)) self.assertEqual( parent.get_hyperparameter("sub:ufhp").meta, dict(ufhp=True)) self.assertEqual( parent.get_hyperparameter("sub:nfhp").meta, dict(nfhp=True)) self.assertEqual( parent.get_hyperparameter("sub:chp").meta, dict(chp=True)) self.assertEqual( parent.get_hyperparameter("sub:ohp").meta, dict(ohp=True)) self.assertEqual( parent.get_hyperparameter("sub:const").meta, dict(const=True))
def choice(label: str, options: List, default=None): if len(options) == 1: return Constant(label, _encode( options[0])) # fixme: if declare probability in here? # fixme: copy from autoflow/hdl2configSpce/hdl2configSpce.py:354 choice2proba = {} not_specific_proba_choices = [] sum_proba = 0 choices = [] raw_choices = [] for option in options: if isinstance(option, (tuple, list)) and len(option) == 2: choice = None proba = None for item in option: if isinstance(item, (float, int)) and 0 <= item <= 1: proba = item else: choice = item assert choice is not None and proba is not None choice2proba[choice] = proba sum_proba += proba else: choice = option not_specific_proba_choices.append(choice) choices.append(_encode(choice)) raw_choices.append(choice) if sum_proba <= 1: if len(not_specific_proba_choices) > 0: p_rest = (1 - sum_proba) / len(not_specific_proba_choices) for not_specific_proba_choice in not_specific_proba_choices: choice2proba[not_specific_proba_choice] = p_rest else: choice2proba = {k: 1 / len(options) for k in choices} proba_list = [choice2proba[k] for k in raw_choices] kwargs = {} if default: kwargs.update({'default_value': _encode(default)}) hp = CategoricalHyperparameter(label, choices, weights=proba_list, **kwargs) hp.probabilities = proba_list # fixme: don't make sense return hp
def test_add_second_condition_wo_conjunction(self): hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = Constant("And", "True") cond1 = EqualsCondition(hp3, hp1, 1) cond2 = EqualsCondition(hp3, hp2, 1) cs = ConfigurationSpace() cs.add_hyperparameter(hp1) cs.add_hyperparameter(hp2) cs.add_hyperparameter(hp3) cs.add_condition(cond1) self.assertRaisesRegex( ValueError, r"Adding a second condition \(different\) for a " r"hyperparameter is ambigouos and " r"therefore forbidden. Add a conjunction " r"instead!", cs.add_condition, cond2)
def test_repr_roundtrip(self): cs = ConfigurationSpace() cs.add_hyperparameter( UniformIntegerHyperparameter("uihp", lower=1, upper=10)) cs.add_hyperparameter( NormalIntegerHyperparameter("nihp", mu=0, sigma=1)) cs.add_hyperparameter( UniformFloatHyperparameter("ufhp", lower=1, upper=10)) cs.add_hyperparameter(NormalFloatHyperparameter("nfhp", mu=0, sigma=1)) cs.add_hyperparameter( CategoricalHyperparameter("chp", choices=['1', '2', '3'])) cs.add_hyperparameter( OrdinalHyperparameter("ohp", sequence=['1', '2', '3'])) cs.add_hyperparameter(Constant("const", value=1)) default = cs.get_default_configuration() repr = default.__repr__() repr = repr.replace('})', '}, configuration_space=cs)') config = eval(repr) self.assertEqual(default, config)
def test_add_conjunction(self): hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = Constant("And", "True") cond1 = EqualsCondition(hp4, hp1, 1) cond2 = EqualsCondition(hp4, hp2, 1) cond3 = EqualsCondition(hp4, hp3, 1) andconj1 = AndConjunction(cond1, cond2, cond3) cs = ConfigurationSpace() cs.add_hyperparameter(hp1) cs.add_hyperparameter(hp2) cs.add_hyperparameter(hp3) cs.add_hyperparameter(hp4) cs.add_condition(andconj1) self.assertNotIn(hp4, cs.get_all_unconditional_hyperparameters())
def get_configspace(self, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500, q=50) num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 2047, default_value=128) max_depth = Constant('max_depth', 15) learning_rate = UniformFloatHyperparameter("learning_rate", 1e-3, 0.3, default_value=0.1, log=True) min_child_samples = UniformIntegerHyperparameter("min_child_samples", 5, 30, default_value=20) subsample = UniformFloatHyperparameter("subsample", 0.7, 1, default_value=1, q=0.1) colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.7, 1, default_value=1, q=0.1) cs.add_hyperparameters([n_estimators, num_leaves, max_depth, learning_rate, min_child_samples, subsample, colsample_bytree]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'n_estimators': (hp.randint('lgb_n_estimators', 19) + 2) * 50, 'num_leaves': hp.randint('lgb_num_leaves', 2017) + 31, 'max_depth': 15, 'learning_rate': hp.loguniform('lgb_learning_rate', np.log(1e-3), np.log(0.3)), 'min_child_samples': hp.randint('lgb_min_child_samples', 26) + 5, 'subsample': (hp.randint('lgb_subsample', 4) + 7) * 0.1, 'colsample_bytree': (hp.randint('lgb_colsample_bytree', 4) + 7) * 0.1, } return space elif optimizer == 'gpflowopt': from gpflowopt.domain import ContinuousParameter domain = ( ContinuousParameter('n_estimators', 100, 1000) + ContinuousParameter('num_leaves', 31, 2047) + ContinuousParameter('max_depth', 15, 16) + ContinuousParameter("learning_rate", 1e-3, 0.3) + ContinuousParameter("min_child_samples", 5, 30) + ContinuousParameter("subsample", 0.7, 1) + ContinuousParameter("colsample_bytree", 0.7, 1) ) return domain else: raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
def get_config_space_from_dict(space_dict: dict): cs = ConfigurationSpace() params_dict = space_dict['parameters'] for key in params_dict: param_dict = params_dict[key] param_type = param_dict['type'] if param_type in ['float', 'int']: bound = param_dict['bound'] optional_args = dict() if 'default' in param_dict: optional_args['default_value'] = param_dict['default'] if 'log' in param_dict: optional_args['log'] = parse_bool(param_dict['log']) if 'q' in param_dict: optional_args['q'] = param_dict['q'] if param_type == 'float': param = UniformFloatHyperparameter(key, bound[0], bound[1], **optional_args) else: param = UniformIntegerHyperparameter(key, bound[0], bound[1], **optional_args) elif param_type == 'cat': choices = param_dict['choice'] optional_args = dict() if 'default' in param_dict: optional_args['default_value'] = param_dict['default'] param = CategoricalHyperparameter(key, choices, **optional_args) elif param_type == 'const': value = param_dict['value'] param = Constant(key, value) else: raise ValueError("Parameter type %s not supported!" % param_type) cs.add_hyperparameter(param) return cs
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, time_limit=None, trial_num=0, metric='acc', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): """ :param classifier_ids: subset of {'adaboost','bernoulli_nb','decision_tree','extra_trees','gaussian_nb','gradient_boosting', 'gradient_boosting','k_nearest_neighbors','lda','liblinear_svc','libsvm_svc','multinomial_nb','passive_aggressive','qda', 'random_forest','sgd'} """ super(ConditioningBlock, self).__init__(node_list, node_index, task_type, timestamp, fe_config_space, cash_config_space, data, fixed_config=fixed_config, time_limit=time_limit, trial_num=trial_num, metric=metric, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) # Best configuration. self.optimal_arm = None self.best_lower_bounds = None # Bandit settings. self.alpha = 4 self.arms = list( cash_config_space.get_hyperparameter('algorithm').choices) self.rewards = dict() self.sub_bandits = dict() self.evaluation_cost = dict() self.arm_cost_stats = dict() for _arm in self.arms: self.arm_cost_stats[_arm] = list() for arm in self.arms: self.rewards[arm] = list() self.evaluation_cost[arm] = list() hps = cash_config_space.get_hyperparameters() cs = ConfigurationSpace() cs.add_hyperparameter(Constant('algorithm', arm)) for hp in hps: if hp.name.split(':')[0] == arm: cs.add_hyperparameter(hp) # Add active conditions conds = cash_config_space.get_conditions() for cond in conds: try: cs.add_condition(cond) except: pass # Add active forbidden clauses forbids = cash_config_space.get_forbiddens() for forbid in forbids: try: cs.add_forbidden_clause(forbid) except: pass from solnml.blocks.block_utils import get_node_type child_type = get_node_type(node_list, node_index + 1) self.sub_bandits[arm] = child_type( node_list, node_index + 1, task_type, timestamp, deepcopy(fe_config_space), deepcopy(cs), data.copy_(), fixed_config=fixed_config, time_limit=time_limit, metric=metric, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) self.action_sequence = list() self.final_rewards = list() self.start_time = time.time() self.time_records = list() # Initialize the parameters. self.pull_cnt = 0 self.pick_id = 0 self.update_cnt = 0 arm_num = len(self.arms) self.optimal_algo_id = None self.arm_candidate = self.arms.copy() self.best_lower_bounds = np.zeros(arm_num) _iter_id = 0 if self.time_limit is None: if arm_num * self.alpha > self.trial_num: raise ValueError('Trial number should be larger than %d.' % (arm_num * self.alpha)) else: self.trial_num = MAX_INT
def recursion(self, hdl, is_choice=False): ############ Declare ConfigurationSpace variables ################### cs = ConfigurationSpace() ####### Fill placeholder to empty ConfigurationSpace ################ key_list = list(hdl.keys()) if len(key_list) == 0: cs.add_hyperparameter(Constant("placeholder", "placeholder")) return cs ###################### Declare common variables ##################### option_hp = None pattern = re.compile(r"(.*)\((.*)\)") store = {} conditions_dict = {} ########### If parent is choice configuration_space ################# if is_choice: choices = [] for k, v in hdl.items(): if not is_hdl_bottom(k, v) and isinstance(v, dict): k = self.eliminate_suffix(k) choices.append(self.eliminate_suffix(k)) option_hp = CategoricalHyperparameter('__choice__', choices) cs.add_hyperparameter(option_hp) #### Travel key,value in hdl items, if value is dict(hdl), do recursion ###### # fixme: 'option_hp' maybe reference without define ? for hdl_key, hdl_value in hdl.items(): mat = pattern.match(hdl_key) # add_configuration_space (choice) if mat and isinstance(hdl_value, dict): groups = mat.groups() assert len(groups) == 2, ValueError( f"Invalid hdl_key {hdl_key}") cs_name, method = groups assert method == "choice", ValueError( f"Invalid suffix {method}") self.add_configuration_space(cs, cs_name, hdl_value, is_choice, option_hp, True) elif is_hdl_bottom(hdl_key, hdl_value): if hdl_key.startswith("__"): conditions_dict[hdl_key] = hdl_value else: hp = self.__parse_dict_to_config(hdl_key, hdl_value) cs.add_hyperparameter(hp) store[hdl_key] = hp # add_configuration_space elif isinstance(hdl_value, dict): cs_name = hdl_key self.add_configuration_space(cs, cs_name, hdl_value, is_choice, option_hp) else: raise NotImplementedError ########### Processing conditional hyperparameters ################# for key, value in conditions_dict.items(): condition_indicator = key if condition_indicator == "__condition": assert isinstance(value, list) for item in value: cond = self.__condition(item, store) cs.add_condition(cond) elif condition_indicator == "__activate": self.__activate(value, store, cs) elif condition_indicator == "__forbidden": self.__forbidden(value, store, cs) # fixme: remove 'rely_model' return cs
def get_constant_param(name: str): return Constant(name, 0.)
exit(0) # print(shps) # Configuration space object: # Hyperparameters: # estimating:__choice__, Type: Categorical, Choices: {lightgbm}, Default: lightgbm # estimating:lightgbm:n_estimator, Type: Constant, Value: 100:int # preprocessing:0num->final:__choice__, Type: Categorical, Choices: {scale.standardize}, Default: scale.standardize # preprocessing:0num->final:scale.standardize:placeholder, Type: Constant, Value: placeholder # Conditions: # estimating:lightgbm:n_estimator | estimating:__choice__ == 'lightgbm' # preprocessing:0num->final:scale.standardize:placeholder | preprocessing:0num->final:__choice__ == 'scale.standardize' # scale.standardize standardize_cs = ConfigurationSpace() standardize_cs.add_hyperparameter(Constant("copy", "True:bool")) # scale.normalize normalize_cs = ConfigurationSpace() normalize_cs.add_hyperparameter(Constant("copy", "True:bool")) # scale scale_cs = ConfigurationSpace() scale_choice = CategoricalHyperparameter( '__choice__', ["scale.standardize", "scale.normalize"]) scale_cs.add_hyperparameter(scale_choice) scale_cs.add_configuration_space("scale.standardize", standardize_cs, parent_hyperparameter={ "parent": scale_choice, "value": "scale.standardize" }) scale_cs.add_configuration_space("scale.normalize",
def test_check_configuration(self): # TODO this is only a smoke test # TODO actually, this rather tests the evaluate methods in the # conditions module! cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp6, hp4, 1) cond5 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond4, cond5) cs.add_condition(conj3) expected_outcomes = [ False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, True ] for idx, values in enumerate(product([0, 1], repeat=5)): # The hyperparameters aren't sorted, but the test assumes them to # be sorted. hyperparameters = sorted(cs.get_hyperparameters(), key=lambda t: t.name) instantiations = { hyperparameters[jdx + 1].name: values[jdx] for jdx in range(len(values)) } evaluation = conj3.evaluate(instantiations) self.assertEqual(expected_outcomes[idx], evaluation) if not evaluation: self.assertRaisesRegex( ValueError, r"Inactive hyperparameter 'AND' must " r"not be specified, but has the vector value: " r"'0.0'.", Configuration, cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True", }, ) else: Configuration( cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True", }, )
def recursion(self, hdl: Dict, path=()) -> ConfigurationSpace: cs = ConfigurationSpace() # 检测一下这个dict是否在直接描述超参 key_list = list(hdl.keys()) if len(key_list) == 0: cs.add_hyperparameter(Constant("placeholder", "placeholder")) return cs else: sample_key = key_list[0] sample_value = hdl[sample_key] if is_hdl_bottom(sample_key, sample_value): store = {} conditions_dict = {} for key, value in hdl.items(): if purify_key(key).startswith("__"): conditions_dict[key] = value else: hp = self.__parse_dict_to_config(key, value) cs.add_hyperparameter(hp) store[key] = hp for key, value in conditions_dict.items(): if SERIES_CONNECT_LEADER_TOKEN in key: leader_model, condition_indicator = key.split( SERIES_CONNECT_LEADER_TOKEN) else: leader_model, condition_indicator = None, key if condition_indicator == "__condition": assert isinstance(value, list) for item in value: cond = self.__condition(item, store, leader_model) cs.add_condition(cond) elif condition_indicator == "__activate": self.__activate(value, store, cs, leader_model) elif condition_indicator == "__forbidden": self.__forbidden(value, store, cs, leader_model) elif condition_indicator == "__rely_model": RelyModels.info.append([value, deepcopy(path)]) return cs pattern = re.compile(r"(.*)\((.*)\)") for key, value in hdl.items(): mat = pattern.match(key) if mat: groups = mat.groups() assert len(groups) == 2 prefix_name, method = groups value_list = list(value.keys()) assert len(value_list) >= 1 if method == "choice": pass else: raise NotImplementedError() cur_cs = ConfigurationSpace() assert isinstance(value, dict) # 不能用constant,会报错 choice2proba = {} not_specific_proba_choices = [] sum_proba = 0 for k in value_list: v = value[k] if isinstance(v, dict) and "__proba" in v: proba = v.pop("__proba") choice2proba[k] = proba sum_proba += proba else: not_specific_proba_choices.append(k) if sum_proba <= 1: if len(not_specific_proba_choices) > 0: p_rest = (1 - sum_proba) / len(not_specific_proba_choices) for not_specific_proba_choice in not_specific_proba_choices: choice2proba[not_specific_proba_choice] = p_rest else: choice2proba = {k: 1 / len(value_list) for k in value_list} proba_list = [choice2proba[k] for k in value_list] value_list = list(map(smac_hdl._encode, value_list)) # choices must be str option_param = CategoricalHyperparameter( '__choice__', value_list, weights=proba_list) # todo : default cur_cs.add_hyperparameter(option_param) for sub_key, sub_value in value.items(): assert isinstance(sub_value, dict) sub_cs = self.recursion(sub_value, path=list(path) + [prefix_name, sub_key]) parent_hyperparameter = { 'parent': option_param, 'value': sub_key } cur_cs.add_configuration_space( sub_key, sub_cs, parent_hyperparameter=parent_hyperparameter) cs.add_configuration_space(prefix_name, cur_cs) elif isinstance(value, dict): sub_cs = self.recursion(value, path=list(path) + [key]) cs.add_configuration_space(key, sub_cs) else: raise NotImplementedError() return cs
def test_generate_grid(self): '''Test grid generation''' # Sub-test 1 cs = ConfigurationSpace(seed=1234) cat1 = CategoricalHyperparameter(name='cat1', choices=['T', 'F']) const1 = Constant(name='const1', value=4) float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) int1 = UniformIntegerHyperparameter(name='int1', lower=10, upper=100, log=True) ord1 = OrdinalHyperparameter(name='ord1', sequence=['1', '2', '3']) cs.add_hyperparameters([float1, int1, cat1, ord1, const1]) num_steps_dict = {'float1': 11, 'int1': 6} generated_grid = generate_grid(cs, num_steps_dict) # Check randomly pre-selected values in the generated_grid # 2 * 1 * 11 * 6 * 3 total diff. possible configurations self.assertEqual(len(generated_grid), 396) # Check 1st and last generated configurations completely: first_expected_dict = { 'cat1': 'T', 'const1': 4, 'float1': -1.0, 'int1': 10, 'ord1': '1' } last_expected_dict = { 'cat1': 'F', 'const1': 4, 'float1': 1.0, 'int1': 100, 'ord1': '3' } self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) self.assertEqual(generated_grid[198].get_dictionary()['cat1'], 'F') self.assertEqual(generated_grid[45].get_dictionary()['const1'], 4) # The 2 most frequently changing HPs (int1 and ord1) have 3 * 6 = 18 different values for # each value of float1, so the 4th value of float1 of -0.4 is reached after # 3 * 18 = 54 values in the generated_grid (and remains the same for the next 18 values): for i in range(18): self.assertAlmostEqual( generated_grid[54 + i].get_dictionary()['float1'], -0.4, places=2) # 5th diff. value for int1 after 4 * 3 = 12 values. Reasoning as above. self.assertEqual(generated_grid[12].get_dictionary()['int1'], 63) self.assertEqual(generated_grid[3].get_dictionary()['ord1'], '1') self.assertEqual(generated_grid[4].get_dictionary()['ord1'], '2') self.assertEqual(generated_grid[5].get_dictionary()['ord1'], '3') # Sub-test 2 # Test for extreme cases: only numerical cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([float1, int1]) num_steps_dict = {'float1': 11, 'int1': 6} generated_grid = generate_grid(cs, num_steps_dict) self.assertEqual(len(generated_grid), 66) # Check 1st and last generated configurations completely: first_expected_dict = {'float1': -1.0, 'int1': 10} last_expected_dict = {'float1': 1.0, 'int1': 100} self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) # Test: only categorical cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([cat1]) generated_grid = generate_grid(cs) self.assertEqual(len(generated_grid), 2) # Check 1st and last generated configurations completely: self.assertEqual(generated_grid[0].get_dictionary()['cat1'], 'T') self.assertEqual(generated_grid[-1].get_dictionary()['cat1'], 'F') # Test: only constant cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([const1]) generated_grid = generate_grid(cs) self.assertEqual(len(generated_grid), 1) # Check 1st and only generated configuration completely: self.assertEqual(generated_grid[0].get_dictionary()['const1'], 4) # Test: no hyperparameters yet cs = ConfigurationSpace(seed=1234) generated_grid = generate_grid(cs, num_steps_dict) # For the case of no hyperparameters, in get_cartesian_product, itertools.product() returns # a single empty tuple element which leads to a single empty Configuration. self.assertEqual(len(generated_grid), 0) # Sub-test 3 # Tests for quantization and conditional spaces. num_steps_dict supports specifying steps # for only some of the int and float HPs. The rest are taken from the 'q' member variables # of these HPs. The conditional space tested has 2 levels of conditions. cs2 = ConfigurationSpace(seed=123) float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) int1 = UniformIntegerHyperparameter(name='int1', lower=0, upper=1000, log=False, q=500) cs2.add_hyperparameters([float1, int1]) int2_cond = UniformIntegerHyperparameter(name='int2_cond', lower=10, upper=100, log=True) cs2.add_hyperparameters([int2_cond]) cond_1 = AndConjunction(LessThanCondition(int2_cond, float1, -0.5), GreaterThanCondition(int2_cond, int1, 600)) cs2.add_conditions([cond_1]) cat1_cond = CategoricalHyperparameter(name='cat1_cond', choices=['apple', 'orange']) cs2.add_hyperparameters([cat1_cond]) cond_2 = AndConjunction(GreaterThanCondition(cat1_cond, int1, 300), LessThanCondition(cat1_cond, int1, 700), GreaterThanCondition(cat1_cond, float1, -0.5), LessThanCondition(cat1_cond, float1, 0.5)) cs2.add_conditions([cond_2]) float2_cond = UniformFloatHyperparameter(name='float2_cond', lower=10., upper=100., log=True) # 2nd level dependency in ConfigurationSpace tree being tested cs2.add_hyperparameters([float2_cond]) cond_3 = GreaterThanCondition(float2_cond, int2_cond, 50) cs2.add_conditions([cond_3]) num_steps_dict1 = {'float1': 4, 'int2_cond': 3, 'float2_cond': 3} generated_grid = generate_grid(cs2, num_steps_dict1) self.assertEqual(len(generated_grid), 18) # RR: I manually generated the grid and verified the values were correct. # Check 1st and last generated configurations completely: first_expected_dict = {'float1': -1.0, 'int1': 0} last_expected_dict = { 'float1': -1.0, 'int1': 1000, 'int2_cond': 100, 'float2_cond': 100.0 } self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) # Here, we test that a few randomly chosen values in the generated grid # correspond to the ones I checked. self.assertEqual(generated_grid[3].get_dictionary()['int1'], 1000) self.assertEqual(generated_grid[12].get_dictionary()['cat1_cond'], 'orange') self.assertAlmostEqual( generated_grid[-2].get_dictionary()['float2_cond'], 31.622776601683803, places=3) # Sub-test 4 # Test: only a single hyperparameter and num_steps_dict is None cs = ConfigurationSpace(seed=1234) cs.add_hyperparameters([float1]) num_steps_dict = {'float1': 11} try: generated_grid = generate_grid(cs) except ValueError as e: assert str(e) == "num_steps_dict is None or doesn't contain " \ "the number of points to divide float1 into. And its quantization " \ "factor is None. Please provide/set one of these values." generated_grid = generate_grid(cs, num_steps_dict) self.assertEqual(len(generated_grid), 11) # Check 1st and last generated configurations completely: self.assertEqual(generated_grid[0].get_dictionary()['float1'], -1.0) self.assertEqual(generated_grid[-1].get_dictionary()['float1'], 1.0)
def test_generate_grid(self): cs = ConfigurationSpace(seed=1234) cat1 = CategoricalHyperparameter(name='cat1', choices=['T', 'F']) const1 = Constant(name='const1', value=4) float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) int1 = UniformIntegerHyperparameter(name='int1', lower=10, upper=100, log=True) ord1 = OrdinalHyperparameter(name='ord1', sequence=['1', '2', '3']) cs.add_hyperparameters([float1, int1, cat1, ord1, const1]) num_steps_dict = {'float1': 11, 'int1': 6} #TODO uncomment below and add asserts for later test cases # generated_grid = generate_grid(cs, num_steps_dict) # # # Check randomly pre-selected values in the generated_grid # self.assertEqual(len(generated_grid), 396) # 2 * 1 * 11 * 6 * 3 total diff. possibilities for the Configuration (i.e., for each tuple of HPs) # self.assertEqual(generated_grid[0].get_dictionary()['cat1'], 'T') # self.assertEqual(generated_grid[198].get_dictionary()['cat1'], 'F') # # self.assertEqual(generated_grid[45].get_dictionary()['const1'], 4) # # self.assertAlmostEqual(generated_grid[55].get_dictionary()['float1'], -0.4, places=2) # The 2 most frequently changing HPs (int1 and ord1) have 3*6 = 18 different values for each value of float1, so the 4th value of float1 of -0.4 is reached after 3*18 = 54 values. # self.assertEqual(generated_grid[12].get_dictionary()['int1'], 63) # 5th diff. value for int1 after 4*3 = 12 values. Reasoning as above. # self.assertEqual(generated_grid[3].get_dictionary()['ord1'], '1') # # self.assertEqual(generated_grid[4].get_dictionary()['ord1'], '2') # # self.assertEqual(generated_grid[5].get_dictionary()['ord1'], '3') # #tests for quantization and conditional spaces: 1 basic one for 1 condition; 1 for tree of conditions; 1 for tree of conditions with OrConjunction and AndConjunction; cs2 = CS.ConfigurationSpace(seed=123) float1 = CSH.UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) int1 = CSH.UniformIntegerHyperparameter(name='int1', lower=0, upper=1000, log=False, q=500) cs2.add_hyperparameters([float1, int1]) int2_cond = CSH.UniformIntegerHyperparameter(name='int2_cond', lower=10, upper=100, log=True) cs2.add_hyperparameters([int2_cond]) cond_1 = CS.AndConjunction( CS.LessThanCondition(int2_cond, float1, -0.5), CS.GreaterThanCondition(int2_cond, int1, 600)) cs2.add_conditions([cond_1]) cat1_cond = CSH.CategoricalHyperparameter(name='cat1_cond', choices=['apple', 'orange']) cs2.add_hyperparameters([cat1_cond]) cond_2 = CS.AndConjunction( CS.GreaterThanCondition(cat1_cond, int1, 300), CS.LessThanCondition(cat1_cond, int1, 700), CS.GreaterThanCondition(cat1_cond, float1, -0.5), CS.LessThanCondition(cat1_cond, float1, 0.5)) cs2.add_conditions([cond_2]) float2_cond = CSH.UniformFloatHyperparameter(name='float2_cond', lower=10., upper=100., log=True) cs2.add_hyperparameters([float2_cond]) cond_3 = CS.GreaterThanCondition(float2_cond, int2_cond, 50) cs2.add_conditions([cond_3]) print(cs2) num_steps_dict1 = {'float1': 4, 'int2_cond': 3, 'float2_cond': 3} configspace_grid = ConfigSpaceGrid(cs2) generated_grid = configspace_grid.generate_grid(num_steps_dict1)