def test_get_hyperparameters_topological_sort(self): # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = CategoricalHyperparameter("input4", [0, 1]) hp5 = CategoricalHyperparameter("input5", [0, 1]) hp6 = Constant("AND", "True") # More top-level hyperparameters hp7 = CategoricalHyperparameter("input7", [0, 1]) # Somewhat shuffled hyperparameters = [hp1, hp2, hp3, hp4, hp5, hp6, hp7] for hp in hyperparameters: cs.add_hyperparameter(hp) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) hps = cs.get_hyperparameters() # AND is moved to the front because of alphabetical sorting for hp, idx in zip(hyperparameters, [1, 2, 3, 4, 6, 0, 5]): self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name) cs.add_condition(cond5) hps = cs.get_hyperparameters() for hp, idx in zip(hyperparameters, [1, 2, 3, 6, 5, 0, 4]): self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name) cs.add_condition(conj3) hps = cs.get_hyperparameters() # print(hps, hyperparameters) for hp, idx in zip(hyperparameters, [0, 1, 2, 5, 4, 6, 3]): # print(hp, idx) self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name)
def test_get_hyperparameters_topological_sort(self): # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = CategoricalHyperparameter("input4", [0, 1]) hp5 = CategoricalHyperparameter("input5", [0, 1]) hp6 = Constant("AND", "True") # More top-level hyperparameters hp7 = CategoricalHyperparameter("input7", [0, 1]) # Somewhat shuffled hyperparameters = [hp1, hp2, hp3, hp4, hp5, hp6, hp7] for hp in hyperparameters: cs.add_hyperparameter(hp) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) hps = cs.get_hyperparameters() # AND is moved to the front because of alphabetical sorting for hp, idx in zip(hyperparameters, [1, 2, 3, 4, 6, 0, 5]): self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name) cs.add_condition(cond5) hps = cs.get_hyperparameters() for hp, idx in zip(hyperparameters, [1, 2, 3, 6, 5, 0, 4]): self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name) cs.add_condition(conj3) hps = cs.get_hyperparameters() # print(hps, hyperparameters) for hp, idx in zip(hyperparameters, [0, 1, 2, 5, 4, 6, 3]): # print(hp, idx) self.assertEqual(hps.index(hp), idx) self.assertEqual(cs._hyperparameter_idx[hp.name], idx) self.assertEqual(cs._idx_to_hyperparameter[idx], hp.name)
def test_get_hyperparamforbidden_clauseseters(self): cs = ConfigurationSpace() self.assertEqual(0, len(cs.get_hyperparameters())) hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) self.assertEqual([hp1], cs.get_hyperparameters()) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 1) cs.add_condition(cond1) self.assertEqual([hp1, hp2], cs.get_hyperparameters()) # TODO: I need more tests for the topological sort! self.assertEqual([hp1, hp2], cs.get_hyperparameters())
def test_get_hyperparamforbidden_clauseseters(self): cs = ConfigurationSpace() self.assertEqual(0, len(cs.get_hyperparameters())) hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) self.assertEqual([hp1], cs.get_hyperparameters()) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 1) cs.add_condition(cond1) self.assertEqual([hp1, hp2], cs.get_hyperparameters()) # TODO: I need more tests for the topological sort! self.assertEqual([hp1, hp2], cs.get_hyperparameters())
def impute_default_values(configuration_space: ConfigurationSpace, configs_array: np.ndarray) -> np.ndarray: """Impute inactive hyperparameters in configuration array with their default. Necessary to apply an EPM to the data. Parameters ---------- configuration_space : ConfigurationSpace configs_array : np.ndarray Array of configurations. Returns ------- np.ndarray Array with configuration hyperparameters. Inactive values are imputed with their default value. """ for hp in configuration_space.get_hyperparameters(): default = hp.normalized_default_value idx = configuration_space.get_idx_by_hyperparameter_name(hp.name) nonfinite_mask = ~np.isfinite(configs_array[:, idx]) configs_array[nonfinite_mask, idx] = default return configs_array
def get_random_initial_configs(shps: ConfigurationSpace, n_configs, random_state=42) -> List[Configuration]: None_name = "None:NoneType" shps = deepcopy(shps) shps.seed(random_state) for config in shps.get_hyperparameters(): name: str = config.name if name.startswith(PHASE1) and name.endswith("__choice__") and ( None_name in config.choices): # fixme 重构之后 None_name是不是改变了? config.default_value = None_name model_choice = shps.get_hyperparameter(f"{PHASE2}:__choice__") result = [] for choice in model_choice.choices: cur_phps = deepcopy(shps) cur_phps.get_hyperparameter( f"{PHASE2}:__choice__").default_value = choice default = cur_phps.get_default_configuration() result.append(default) if len(result) < n_configs: result.extend(shps.sample_configuration(n_configs - len(result))) elif len(result) > n_configs: result = random.sample(result, n_configs) return result
def _preprocess_configspace( self, config_space: CS.ConfigurationSpace) -> CS.ConfigurationSpace: """ Converts floats to np.float32 """ for hp in config_space.get_hyperparameters(): hp.sequence = tuple(np.array(hp.sequence).astype(np.float32)) hp.default_value = np.float32(hp.default_value) return config_space
def deactivate_inactive_hyperparameters( configuration: Dict, configuration_space: ConfigurationSpace, vector: Union[None, np.ndarray] = None, ): hyperparameters = configuration_space.get_hyperparameters() configuration = Configuration(configuration_space=configuration_space, values=configuration, vector=vector, allow_inactive_with_values=True) hps = deque() unconditional_hyperparameters = configuration_space.get_all_unconditional_hyperparameters( ) hyperparameters_with_children = list() for uhp in unconditional_hyperparameters: children = configuration_space._children_of[uhp] if len(children) > 0: hyperparameters_with_children.append(uhp) hps.extendleft(hyperparameters_with_children) inactive = set() while len(hps) > 0: hp = hps.pop() children = configuration_space._children_of[hp] for child in children: conditions = configuration_space._parent_conditions_of[child.name] for condition in conditions: if not condition.evaluate_vector(configuration.get_array()): dic = configuration.get_dictionary() try: del dic[child.name] except KeyError: continue configuration = Configuration( configuration_space=configuration_space, values=dic, allow_inactive_with_values=True) inactive.add(child.name) hps.appendleft(child.name) for hp in hyperparameters: if hp.name in inactive: dic = configuration.get_dictionary() try: del dic[hp.name] except KeyError: continue configuration = Configuration( configuration_space=configuration_space, values=dic, allow_inactive_with_values=True) return Configuration(configuration_space, values=configuration.get_dictionary())
def get_component_mapping(config_space: ConfigSpace.ConfigurationSpace): """ Each hyperparameter has both a name and a meta-field, containing an component prefix. This function returns a mapping from the concatenated component prefix and hyperparameter name to the hyperparameter name (by which it can be obtained from the config space) """ result = dict() for param in config_space.get_hyperparameters(): component_name = param.meta['component'] + '__' + param.name result[component_name] = param.name return result
def deactivate_inactive_hyperparameters(configuration: dict, configuration_space: ConfigurationSpace): hyperparameters = configuration_space.get_hyperparameters() configuration = Configuration(configuration_space=configuration_space, values=configuration, allow_inactive_with_values=True) hps = deque() unconditional_hyperparameters = configuration_space.get_all_unconditional_hyperparameters() hyperparameters_with_children = list() for uhp in unconditional_hyperparameters: children = configuration_space._children_of[uhp] if len(children) > 0: hyperparameters_with_children.append(uhp) hps.extendleft(hyperparameters_with_children) inactive = set() while len(hps) > 0: hp = hps.pop() children = configuration_space._children_of[hp] for child in children: conditions = configuration_space._parent_conditions_of[child.name] for condition in conditions: if not condition.evaluate_vector(configuration.get_array()): dic = configuration.get_dictionary() try: del dic[child.name] except KeyError: continue configuration = Configuration( configuration_space=configuration_space, values=dic, allow_inactive_with_values=True) inactive.add(child.name) hps.appendleft(child.name) for hp in hyperparameters: if hp.name in inactive: dic = configuration.get_dictionary() try: del dic[hp.name] except KeyError: continue configuration = Configuration( configuration_space=configuration_space, values=dic, allow_inactive_with_values=True) return Configuration(configuration_space, values=configuration.get_dictionary())
def fit(self, config_space: ConfigurationSpace): mask = [] n_choices_list = [] n_constants = 0 n_variables = 0 n_top_levels = 0 parents = [] parent_values = [] # todo: 划分parents与groups for hp in config_space.get_hyperparameters(): if isinstance( hp, Constant) or (isinstance(hp, CategoricalHyperparameter) and len(hp.choices) == 1): # ignore mask.append(False) n_constants += 1 else: mask.append(True) n_variables += 1 if isinstance(hp, CategoricalHyperparameter): n_choices_list.append(len(hp.choices)) else: n_choices_list.append(0) cur_parents = config_space.get_parents_of(hp.name) if len(cur_parents) == 0: n_top_levels += 1 parents.append(None) parent_values.append(None) else: parents.append(cur_parents[0]) parent_conditions = config_space.get_parent_conditions_of( hp.name) parent_condition = parent_conditions[0] parent_values.append(parent_condition.value) groups_str = [ f"{parent}-{parent_value}" for parent, parent_value in zip(parents, parent_values) ] group_encoder = LabelEncoder() groups = group_encoder.fit_transform(groups_str) self.config_space = config_space self.groups_str = groups_str self.group_encoder = group_encoder self.groups = groups self.n_groups = np.max(groups) + 1 self.mask = np.array(mask, dtype="bool") self.n_choices_list = n_choices_list self.n_constants = n_constants self.n_variables = n_variables self.n_top_levels = n_top_levels return self
def CS2HyperoptSpace(cs: ConfigurationSpace): result = {} for hyperparameter in cs.get_hyperparameters(): name = hyperparameter.name if isinstance(hyperparameter, CategoricalHyperparameter): result[name] = hp.choice(name, hyperparameter.choices) elif isinstance(hyperparameter, UniformFloatHyperparameter): lower = hyperparameter.lower upper = hyperparameter.upper result[name] = hp.uniform(name, lower, upper) else: raise ValueError # todo: 考虑更多情况 return result
def remove_hyperparameter( config_space: ConfigSpace.ConfigurationSpace, hyperparameter_name: str) -> ConfigSpace.ConfigurationSpace: config_space_prime = ConfigSpace.ConfigurationSpace(meta=config_space.meta) for hyperparameter in config_space.get_hyperparameters(): if hyperparameter.name != hyperparameter_name: config_space_prime.add_hyperparameter(hyperparameter) for condition in config_space.get_conditions(): if condition.parent.name != hyperparameter_name and condition.child.name != hyperparameter_name: config_space_prime.add_condition(condition) elif condition.parent.name == hyperparameter_name: raise ValueError('Hyperparameter %s can not be removed ' 'as it is a parent condition (child: %s)' % (hyperparameter_name, condition.child.name)) return config_space_prime
def _config_space_to_parameter_distributions( configuration_space: ConfigSpace.ConfigurationSpace) \ -> typing.Dict[str, typing.Union[typing.List, scipy.stats.rv_discrete, scipy.stats.rv_continuous]]: """ Takes a ConfigSpace object and serializes it into a parameter grid, to be used by the scikit-learn interface. Parameters ---------- configuration_space: ConfigSpace.ConfigurationSpace The configuration space describes all hyperparameters and ranges Returns ------- result: Dict A dict mapping from hyperparameter name to List of Values or distribution """ result = dict() for hyperparameter in configuration_space.get_hyperparameters(): if isinstance(hyperparameter, ConfigSpace.hyperparameters.UniformFloatHyperparameter): loc = hyperparameter.lower scale = hyperparameter.upper - hyperparameter.lower distribution = scipy.stats.uniform(loc=loc, scale=scale) elif isinstance( hyperparameter, ConfigSpace.hyperparameters.UniformIntegerHyperparameter): distribution = scipy.stats.randint(hyperparameter.lower, hyperparameter.upper + 1) elif isinstance(hyperparameter, ConfigSpace.hyperparameters.CategoricalHyperparameter): distribution = list(hyperparameter.choices) elif isinstance( hyperparameter, ConfigSpace.hyperparameters.UnParametrizedHyperparameter): distribution = [hyperparameter.value] elif isinstance(hyperparameter, ConfigSpace.hyperparameters.Constant): distribution = [hyperparameter.value] else: raise ValueError('Hyperparameter type not supported yet: %s' % type(hyperparameter)) result[hyperparameter.name] = distribution return result
def fix_types(configuration: dict, configuration_space: ConfigurationSpace): ''' iterates over all hyperparameters in the ConfigSpace and fixes the types of the parameter values in configuration. Arguments --------- configuration: dict param name -> param value configuration_space: ConfigurationSpace Configuration space which knows the types for all parameter values Returns ------- configuration: dict with fixed types of parameter values ''' for param in configuration_space.get_hyperparameters(): param_name = param.name if configuration.get(param_name) is not None: if isinstance(param, (CategoricalHyperparameter)): # should be unnecessary, but to be on the safe param_name: configuration[param_name] = str(configuration[param_name]) elif isinstance(param, (OrdinalHyperparameter)): # should be unnecessary, but to be on the safe side: configuration[param_name] = str(configuration[param_name]) elif isinstance(param, Constant): # should be unnecessary, but to be on the safe side: configuration[param_name] = str(configuration[param_name]) elif isinstance(param, UniformFloatHyperparameter): configuration[param_name] = float(configuration[param_name]) elif isinstance(param, UniformIntegerHyperparameter): configuration[param_name] = int(configuration[param_name]) else: raise TypeError("Unknown hyperparameter type %s" % type(param)) return configuration
def _check_and_cast_fidelity(fidelity: Union[dict, ConfigSpace.Configuration, None], fidelity_space: ConfigSpace.ConfigurationSpace, **kwargs) \ -> ConfigSpace.Configuration: """ Helper-function to evaluate the given fidelity object. Similar to the checking and casting from above, we validate the fidelity object. To do so, we cast it to a ConfigSpace.Configuration object. If the fidelity is not specified (None), then we use the default fidelity of the benchmark. If the benchmark is a multi-multi-fidelity benchmark and only a subset of the available fidelities is specified, we fill the missing ones with their default values. """ # Make a check, that no fidelities are in the kwargs. f_in_kwargs = [] for f in fidelity_space.get_hyperparameters(): if f.name in kwargs: f_in_kwargs.append(f.name) if len(f_in_kwargs) != 0: raise ValueError(f'Fidelity parameters {", ".join(f_in_kwargs)} should not be part of kwargs\n' f'Fidelity: {fidelity}\n Kwargs: {kwargs}') default_fidelities = fidelity_space.get_default_configuration() if fidelity is None: fidelity = default_fidelities if isinstance(fidelity, dict): default_fidelities_cfg = default_fidelities.get_dictionary() fidelity_copy = fidelity.copy() fidelity = {k: fidelity_copy.pop(k, v) for k, v in default_fidelities_cfg.items()} assert len(fidelity_copy) == 0, 'Provided fidelity dict contained unknown fidelity ' \ f'values: {fidelity_copy.keys()}' fidelity = ConfigSpace.Configuration(fidelity_space, fidelity) elif isinstance(fidelity, ConfigSpace.Configuration): fidelity = fidelity else: raise TypeError(f'Fidelity has to be an instance of type None, dict, or ' f'ConfigSpace.Configuration but was {type(fidelity)}') # Ensure that the extracted fidelity values play well with the defined fidelity space fidelity_space.check_configuration(fidelity) return fidelity
def test_check_configuration(self): # TODO this is only a smoke test # TODO actually, this rather tests the evaluate methods in the # conditions module! cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp6, hp4, 1) cond5 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond4, cond5) cs.add_condition(conj3) expected_outcomes = [ False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, True ] for idx, values in enumerate(product([0, 1], repeat=5)): # The hyperparameters aren't sorted, but the test assumes them to # be sorted. hyperparameters = sorted(cs.get_hyperparameters(), key=lambda t: t.name) instantiations = { hyperparameters[jdx + 1].name: values[jdx] for jdx in range(len(values)) } evaluation = conj3.evaluate(instantiations) self.assertEqual(expected_outcomes[idx], evaluation) if not evaluation: self.assertRaisesRegex( ValueError, r"Inactive hyperparameter 'AND' must " r"not be specified, but has the vector value: " r"'0.0'.", Configuration, cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True", }, ) else: Configuration( cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True", }, )
def run_on_tasks(config_frame_orig: pd.DataFrame, surrogates: typing.Dict[int, sklearn.pipeline.Pipeline], quality_frame: pd.DataFrame, config_space: ConfigSpace.ConfigurationSpace, search_hyperparameters: typing.List[str], search_transform_fns: typing.List[str], hold_out_task: typing.Optional[int], resized_grid_size: int, output_file: str): hold_out_surrogate = None if hold_out_task is not None: hold_out_surrogate = surrogates[hold_out_task] surrogates = dict(surrogates) del surrogates[hold_out_task] # performance untransformed baseline_configuration, baseline_results_per_task = select_best_configuration_across_tasks( config_frame_orig, surrogates, config_frame_orig.columns.values, None, None, None, None) baseline_avg_performance = np.average(baseline_results_per_task) baseline_holdout = None baseline_random_search = None if hold_out_task is not None: baseline_holdout = openmldefaults.utils.single_prediction( config_frame_orig, hold_out_surrogate, baseline_configuration) baseline_random_search = [ openmldefaults.utils.single_prediction( config_frame_orig, hold_out_surrogate, config_space.sample_configuration(1).get_dictionary()) for i in range(50) ] logging.info('Baseline: %s [%s] %s. Holdout task: %s' % (baseline_configuration, baseline_results_per_task, baseline_avg_performance, baseline_holdout)) transform_fns = openmldefaults.symbolic.all_transform_fns() search_transform_fns = search_transform_fns if search_transform_fns is not None else transform_fns.keys( ) search_hyperparameters = search_hyperparameters if search_hyperparameters is not None \ else [hp.name for hp in config_space.get_hyperparameters()] symbolic_defaults = list() for idx_hp, hyperparameter_name in enumerate(search_hyperparameters): hyperparameter = config_space.get_hyperparameter(hyperparameter_name) if isinstance(hyperparameter, ConfigSpace.hyperparameters.Constant): logging.warning('Skipping Constant Hyperparameter: %s' % hyperparameter.name) continue if isinstance( hyperparameter, ConfigSpace.hyperparameters.UnParametrizedHyperparameter): logging.warning('Skipping Unparameterized Hyperparameter: %s' % hyperparameter.name) continue if not isinstance(hyperparameter, ConfigSpace.hyperparameters.NumericalHyperparameter): logging.warning('Skipping Non-Numerical Hyperparameter: %s' % hyperparameter.name) continue logging.info( 'Started with hyperparameter %s (%d/%d)' % (hyperparameter.name, idx_hp + 1, len(search_hyperparameters))) config_space_prime = openmldefaults.utils.remove_hyperparameter( config_space, hyperparameter.name) configurations = openmldefaults.utils.generate_grid_configurations( config_space_prime, 0, resized_grid_size) config_frame_prime = pd.DataFrame(configurations) for idx_trnfm_fn, transform_name in enumerate(search_transform_fns): logging.info( '- Transformer fn %s (%d/%d)' % (transform_name, idx_trnfm_fn + 1, len(transform_fns))) geom_space = np.geomspace(0.01, 2, 10) geom_space = np.append(geom_space, [1]) for idx_av, alpha_value in enumerate(geom_space): logging.info('--- Alpha value %f (%d/%d)' % (alpha_value, idx_av + 1, len(geom_space))) for meta_feature in quality_frame.columns.values: try: transform_fn = openmldefaults.symbolic.all_transform_fns( )[transform_name] symbolic_config, symbolic_results_per_task = select_best_configuration_across_tasks( config_frame_prime, surrogates, config_frame_orig.columns. values, # note to take the original frame hyperparameter.name, transform_fn, alpha_value, quality_frame[meta_feature].to_dict(), ) symbolic_average_performance = np.average( symbolic_results_per_task) if symbolic_average_performance > baseline_avg_performance: symbolic_holdout_score = None if hold_out_surrogate is not None: symbolic_value = transform_fn( alpha_value, quality_frame[meta_feature][hold_out_task]) symbolic_config[ hyperparameter.name] = symbolic_value symbolic_holdout_score = openmldefaults.utils.single_prediction( config_frame_orig, hold_out_surrogate, symbolic_config) current_result = { 'configuration': symbolic_config, 'results_per_task': symbolic_results_per_task, 'avg_performance': symbolic_average_performance, 'holdout_score': symbolic_holdout_score, 'trasnform_hyperparameter': hyperparameter.name, 'transform_fn': transform_name, 'transform_alpha_value': alpha_value, 'transform_meta_feature': meta_feature, } symbolic_defaults.append(current_result) logging.info( 'Found improvement over base-line: %s' % current_result) except ZeroDivisionError: logging.warning( 'Zero division error with (fn=%s, alpha=%s, meta_f=%s). ' 'skipping. ' % (transform_name, alpha_value, meta_feature)) pass except OverflowError: logging.warning( 'Overflow error with (fn=%s, alpha=%s, meta_f=%s). ' 'skipping. ' % (transform_name, alpha_value, meta_feature)) pass except ValueError: # keep a close eye on this one. Question: why do the others not catch this one? logging.warning( 'Overflow error with (fn=%s, alpha=%s, meta_f=%s). ' 'skipping. ' % (transform_name, alpha_value, meta_feature)) pass total = { 'baseline_configuration': baseline_configuration, 'baseline_avg_performance': baseline_avg_performance, 'baseline_random_search': baseline_random_search, 'baseline_results_per_task': baseline_results_per_task, 'baseline_holdout_score': baseline_holdout, 'symbolic_defaults': symbolic_defaults } with open(output_file, 'wb') as fp: pickle.dump(obj=total, file=fp, protocol=0) logging.info('Saved result file to: %s' % output_file)
def fit(self, config_space: ConfigurationSpace): mask = [] n_choices_list = [] is_ordinal_list = [] sequence_mapper = {} n_constants = 0 n_variables = 0 n_variables_embedded = 0 n_top_levels = 0 parents = [] parent_values = [] is_child = [] # todo: 划分parents与groups for hp in config_space.get_hyperparameters(): if isinstance(hp, Constant) or \ (isinstance(hp, CategoricalHyperparameter) and len(hp.choices) == 1) or \ (isinstance(hp, OrdinalHyperparameter) and len(hp.sequence) == 1): # ignore mask.append(False) n_constants += 1 else: mask.append(True) n_variables += 1 if isinstance(hp, CategoricalHyperparameter): n_choices = len(hp.choices) n_choices_list.append(n_choices) n_variables_embedded += get_embed_dims(n_choices) else: n_choices_list.append(0) n_variables_embedded += 1 if isinstance(hp, OrdinalHyperparameter): is_ordinal_list.append(True) sequence_mapper[len(is_ordinal_list) - 1] = hp.sequence else: is_ordinal_list.append(False) cur_parents = config_space.get_parents_of(hp.name) if len(cur_parents) == 0: n_top_levels += 1 parents.append(None) parent_values.append(None) is_child.append(False) else: is_child.append(True) parents.append(cur_parents[0]) parent_conditions = config_space.get_parent_conditions_of( hp.name) parent_condition = parent_conditions[0] parent_values.append(parent_condition.value) groups_str = [ f"{parent}-{parent_value}" for parent, parent_value in zip(parents, parent_values) ] group_encoder = LabelEncoder() groups = group_encoder.fit_transform(groups_str) self.is_child = is_child self.sequence_mapper = sequence_mapper self.is_ordinal_list = is_ordinal_list self.config_space = config_space self.groups_str = groups_str self.group_encoder = group_encoder self.groups = groups self.n_groups = np.max(groups) + 1 self.mask = np.array(mask, dtype="bool") self.n_choices_list = n_choices_list self.n_constants = n_constants self.n_variables = n_variables self.n_variables_embedded = n_variables_embedded self.n_top_levels = n_top_levels self.hp_names = pd.Series( [hp.name for hp in config_space.get_hyperparameters()])[self.mask] high_r_mask = np.array(self.n_choices_list) > 2 self.high_r_cols = self.hp_names[high_r_mask].to_list() self.high_r_cats = [] for ix in np.arange(n_variables)[high_r_mask]: n_choices = n_choices_list[ix] cat = list(range(n_choices)) if is_child[ix]: cat.insert(0, -1) self.high_r_cats.append(cat) if self.encoder is not None: self.encoder.cols = copy(self.high_r_cols) self.encoder.categories = copy(self.high_r_cats) return self
def __init__(self, config_space: CS.ConfigurationSpace, name_last_pos: str = None, value_for_last_pos=None): """ If name_last_pos is given, the hyperparameter of that name is assigned the final position in the vector returned by to_ndarray. This can be used to single out the (time) resource for a GP model, where that component has to come last. If in this case (name_last_pos given), value_for_last_pos is also given, some methods are modified: - random_candidate samples a config as normal, but then overwrites the name_last_pos component by value_for_last_pos - get_ndarray_bounds works as normal, but returns bound (a, a) for name_last_pos component, where a is the internal value corresponding to value_for_last_pos The use case is HPO with a resource attribute. This attribute should be fixed when optimizing the acquisition function, but can take different values in the evaluation data (coming from all previous searches). :param config_space: ConfigurationSpace :param name_last_pos: See above. Default: None :param value_for_last_pos: See above. Default: None """ self.config_space = config_space self.name_last_pos = name_last_pos self.value_for_last_pos = value_for_last_pos # Supports conversion to ndarray numer_src = [] numer_trg = [] categ_src = [] categ_trg = [] categ_card = [] trg_pos = 0 append_at_end = None for src_pos, hp in enumerate(config_space.get_hyperparameters()): if isinstance(hp, CS.CategoricalHyperparameter): card = hp.num_choices if hp.name == name_last_pos: assert append_at_end is None append_at_end = (src_pos, card, True) else: categ_src.append(src_pos) categ_trg.append(trg_pos) categ_card.append(card) trg_pos += card elif isinstance(hp, CS.UniformIntegerHyperparameter) or \ isinstance(hp, CS.UniformFloatHyperparameter): if hp.name == name_last_pos: assert append_at_end is None append_at_end = (src_pos, 1, False) else: numer_src.append(src_pos) numer_trg.append(trg_pos) trg_pos += 1 else: raise NotImplementedError( "We only support hyperparameters of type " "CategoricalHyperparameter, UniformIntegerHyperparameter, " "UniformFloatHyperparameter") if append_at_end is not None: if append_at_end[2]: categ_src.append(append_at_end[0]) categ_trg.append(trg_pos) categ_card.append(append_at_end[1]) else: numer_src.append(append_at_end[0]) numer_trg.append(trg_pos) trg_pos += append_at_end[1] self.numer_src = np.array(numer_src, dtype=np.int64) self.numer_trg = np.array(numer_trg, dtype=np.int64) self.categ_src = np.array(categ_src, dtype=np.int64) self.categ_trg = np.array(categ_trg, dtype=np.int64) self.categ_card = np.array(categ_card, dtype=np.int64) self._ndarray_size = trg_pos self.keys_sorted = sorted( [hp.name for hp in config_space.get_hyperparameters()])
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, time_limit=None, trial_num=0, metric='acc', optimizer='smac', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): super(JointBlock, self).__init__(node_list, node_index, task_type, timestamp, fe_config_space, cash_config_space, data, fixed_config=fixed_config, time_limit=time_limit, trial_num=trial_num, metric=metric, optimizer=optimizer, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) self.fixed_config = fixed_config # Combine configuration space cs = ConfigurationSpace() if fe_config_space is not None: cs.add_hyperparameters(fe_config_space.get_hyperparameters()) cs.add_conditions(fe_config_space.get_conditions()) cs.add_forbidden_clauses(fe_config_space.get_forbiddens()) if cash_config_space is not None: cs.add_hyperparameters(cash_config_space.get_hyperparameters()) cs.add_conditions(cash_config_space.get_conditions()) cs.add_forbidden_clauses(cash_config_space.get_forbiddens()) self.joint_cs = cs # Define evaluator and optimizer if self.task_type in CLS_TASKS: from mindware.components.evaluators.cls_evaluator import ClassificationEvaluator self.evaluator = ClassificationEvaluator( fixed_config=fixed_config, scorer=self.metric, data_node=self.original_data, if_imbal=self.if_imbal, timestamp=self.timestamp, output_dir=self.output_dir, seed=self.seed, resampling_strategy=self.eval_type, resampling_params=self.resampling_params) else: from mindware.components.evaluators.rgs_evaluator import RegressionEvaluator self.evaluator = RegressionEvaluator( fixed_config=fixed_config, scorer=self.metric, data_node=self.original_data, timestamp=self.timestamp, output_dir=self.output_dir, seed=self.seed, resampling_strategy=self.eval_type, resampling_params=self.resampling_params) self.optimizer = build_hpo_optimizer( self.eval_type, self.evaluator, self.joint_cs, optimizer=self.optimizer, output_dir=self.output_dir, per_run_time_limit=self.per_run_time_limit, inner_iter_num_per_iter=1, timestamp=self.timestamp, seed=self.seed, n_jobs=self.n_jobs)
def estimate_config_space_numbers(cs: ConfigurationSpace): result = 1 for config in cs.get_hyperparameters(): result *= (config.get_num_neighbors() + 1) return result
def replace_phps(shps: ConfigurationSpace, key, value): for hp in shps.get_hyperparameters(): if hp.__class__.__name__ == "Constant" and hp.name.endswith(key): hp.value = _encode(value)
def get_types( config_space: ConfigurationSpace, instance_features: typing.Optional[np.ndarray] = None, ) -> typing.Tuple[typing.List[int], typing.List[typing.Tuple[float, float]]]: """TODO""" # Extract types vector for rf from config space and the bounds types = [0] * len(config_space.get_hyperparameters()) bounds = [(np.nan, np.nan)] * len(types) for i, param in enumerate(config_space.get_hyperparameters()): parents = config_space.get_parents_of(param.name) if len(parents) == 0: can_be_inactive = False else: can_be_inactive = True if isinstance(param, (CategoricalHyperparameter)): n_cats = len(param.choices) if can_be_inactive: n_cats = len(param.choices) + 1 types[i] = n_cats bounds[i] = (int(n_cats), np.nan) elif isinstance(param, (OrdinalHyperparameter)): n_cats = len(param.sequence) types[i] = 0 if can_be_inactive: bounds[i] = (0, int(n_cats)) else: bounds[i] = (0, int(n_cats) - 1) elif isinstance(param, Constant): # for constants we simply set types to 0 which makes it a numerical # parameter if can_be_inactive: bounds[i] = (2, np.nan) types[i] = 2 else: bounds[i] = (0, np.nan) types[i] = 0 # and we leave the bounds to be 0 for now elif isinstance(param, UniformFloatHyperparameter): # Are sampled on the unit hypercube thus the bounds # are always 0.0, 1.0 if can_be_inactive: bounds[i] = (-1.0, 1.0) else: bounds[i] = (0, 1.0) elif isinstance(param, UniformIntegerHyperparameter): if can_be_inactive: bounds[i] = (-1.0, 1.0) else: bounds[i] = (0, 1.0) elif not isinstance( param, (UniformFloatHyperparameter, UniformIntegerHyperparameter, OrdinalHyperparameter, CategoricalHyperparameter)): raise TypeError("Unknown hyperparameter type %s" % type(param)) if instance_features is not None: types = types + [0] * instance_features.shape[1] return types, bounds
def test_check_configuration(self): # TODO this is only a smoke test # TODO actually, this rather tests the evaluate methods in the # conditions module! cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp6, hp4, 1) cond5 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond4, cond5) cs.add_condition(conj3) expected_outcomes = [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, True] for idx, values in enumerate(product([0, 1], repeat=5)): # The hyperparameters aren't sorted, but the test assumes them to # be sorted. hyperparameters = sorted(cs.get_hyperparameters(), key=lambda t: t.name) instantiations = {hyperparameters[jdx+1].name: values[jdx] for jdx in range(len(values))} evaluation = conj3.evaluate(instantiations) self.assertEqual(expected_outcomes[idx], evaluation) if not evaluation: self.assertRaisesRegex(ValueError, r"Inactive hyperparameter 'AND' must " r"not be specified, but has the vector value: " r"'0.0'.", Configuration, cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True"}) else: Configuration(cs, values={"input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True"})
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, time_limit=None, trial_num=0, metric='acc', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): """ :param classifier_ids: subset of {'adaboost','bernoulli_nb','decision_tree','extra_trees','gaussian_nb','gradient_boosting', 'gradient_boosting','k_nearest_neighbors','lda','liblinear_svc','libsvm_svc','multinomial_nb','passive_aggressive','qda', 'random_forest','sgd'} """ super(ConditioningBlock, self).__init__(node_list, node_index, task_type, timestamp, fe_config_space, cash_config_space, data, fixed_config=fixed_config, time_limit=time_limit, trial_num=trial_num, metric=metric, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) # Best configuration. self.optimal_arm = None self.best_lower_bounds = None # Bandit settings. self.alpha = 4 self.arms = list( cash_config_space.get_hyperparameter('algorithm').choices) self.rewards = dict() self.sub_bandits = dict() self.evaluation_cost = dict() self.arm_cost_stats = dict() for _arm in self.arms: self.arm_cost_stats[_arm] = list() for arm in self.arms: self.rewards[arm] = list() self.evaluation_cost[arm] = list() hps = cash_config_space.get_hyperparameters() cs = ConfigurationSpace() cs.add_hyperparameter(Constant('algorithm', arm)) for hp in hps: if hp.name.split(':')[0] == arm: cs.add_hyperparameter(hp) # Add active conditions conds = cash_config_space.get_conditions() for cond in conds: try: cs.add_condition(cond) except: pass # Add active forbidden clauses forbids = cash_config_space.get_forbiddens() for forbid in forbids: try: cs.add_forbidden_clause(forbid) except: pass from solnml.blocks.block_utils import get_node_type child_type = get_node_type(node_list, node_index + 1) self.sub_bandits[arm] = child_type( node_list, node_index + 1, task_type, timestamp, deepcopy(fe_config_space), deepcopy(cs), data.copy_(), fixed_config=fixed_config, time_limit=time_limit, metric=metric, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) self.action_sequence = list() self.final_rewards = list() self.start_time = time.time() self.time_records = list() # Initialize the parameters. self.pull_cnt = 0 self.pick_id = 0 self.update_cnt = 0 arm_num = len(self.arms) self.optimal_algo_id = None self.arm_candidate = self.arms.copy() self.best_lower_bounds = np.zeros(arm_num) _iter_id = 0 if self.time_limit is None: if arm_num * self.alpha > self.trial_num: raise ValueError('Trial number should be larger than %d.' % (arm_num * self.alpha)) else: self.trial_num = MAX_INT