def read(jason_string): jason = json.loads(jason_string) if 'name' in jason: configuration_space = ConfigurationSpace(name=jason['name']) else: configuration_space = ConfigurationSpace() for hyperparameter in jason['hyperparameters']: configuration_space.add_hyperparameter( _construct_hyperparameter(hyperparameter, )) for condition in jason['conditions']: configuration_space.add_condition( _construct_condition( condition, configuration_space, )) for forbidden in jason['forbiddens']: configuration_space.add_forbidden_clause( _construct_forbidden( forbidden, configuration_space, )) return configuration_space
def get_cs(): cs = ConfigurationSpace() epsilon = CategoricalHyperparameter("epsilon", [1e-4, 1e-3, 1e-2, 1e-1, 1], default_value=1e-4) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters( [epsilon, loss, dual, tol, C, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive")) cs.add_forbidden_clause(dual_and_loss) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="squared_epsilon_insensitive") # Random Guess epsilon = UniformFloatHyperparameter( name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True) dual = Constant("dual", "False") # These are set ad-hoc tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) fit_intercept =Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([C, loss, epsilon, dual, tol, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs
def read(jason_string): """ Create a configuration space definition from a json string. Example ------- .. testsetup:: json_test from ConfigSpace import ConfigurationSpace import ConfigSpace.hyperparameters as CSH from ConfigSpace.read_and_write import json cs = ConfigurationSpace() cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3])) with open('configspace.json', 'w') as f: f.write(json.write(cs)) .. doctest:: json_test >>> from ConfigSpace.read_and_write import json >>> with open('configspace.json', 'r') as f: ... jason_string = f.read() ... config = json.read(jason_string) Parameters ---------- jason_string : str A json string representing a configuration space definition Returns ------- :class:`~ConfigSpace.configuration_space.ConfigurationSpace` The deserialized ConfigurationSpace object """ jason = json.loads(jason_string) if 'name' in jason: configuration_space = ConfigurationSpace(name=jason['name']) else: configuration_space = ConfigurationSpace() for hyperparameter in jason['hyperparameters']: configuration_space.add_hyperparameter( _construct_hyperparameter(hyperparameter, )) for condition in jason['conditions']: configuration_space.add_condition( _construct_condition( condition, configuration_space, )) for forbidden in jason['forbiddens']: configuration_space.add_forbidden_clause( _construct_forbidden( forbidden, configuration_space, )) return configuration_space
def set_probabilities_in_cs(self, cs: ConfigurationSpace, relied2models: Dict[str, List[str]], relied2AllModels: Dict[str, List[str]], all_models: List[str], **kwargs): estimator = cs.get_hyperparameter("estimator:__choice__") probabilities = [] model2prob = {} L = 0 for rely_model in relied2models: cur_models = relied2models[rely_model] L += len(cur_models) for model in cur_models: model2prob[model] = kwargs[rely_model] / len(cur_models) p_rest = (1 - sum(model2prob.values())) / (len(all_models) - L) for model in estimator.choices: probabilities.append(model2prob.get(model, p_rest)) estimator.probabilities = probabilities default_estimator_choice = None for models in relied2models.values(): if models: default_estimator_choice = models[0] estimator.default_value = default_estimator_choice for rely_model, path in RelyModels.info: forbid_eq_value = path[-1] path = path[:-1] forbid_eq_key = ":".join(path + ["__choice__"]) forbid_eq_key_hp = cs.get_hyperparameter(forbid_eq_key) forbid_in_key = "estimator:__choice__" hit = relied2AllModels.get(rely_model) if not hit: choices = list(forbid_eq_key_hp.choices) choices.remove(forbid_eq_value) forbid_eq_key_hp.choices = tuple(choices) forbid_eq_key_hp.default_value = choices[0] forbid_eq_key_hp.probabilities = [1 / len(choices) ] * len(choices) # fixme 最后我放弃了在这上面进行修改,在hdl部分就做了预处理 continue forbid_in_value = list(set(all_models) - set(hit)) # 只选择了boost模型 if not forbid_in_value: continue choices = forbid_eq_key_hp.choices probabilities = [] p: float = kwargs[rely_model] p_rest = (1 - p) * (len(choices) - 1) for choice in choices: if choice == forbid_eq_value: probabilities.append(p) else: probabilities.append(p_rest) forbid_eq_key_hp.probabilities = probabilities cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause(forbid_eq_key_hp, forbid_eq_value), ForbiddenInClause(cs.get_hyperparameter(forbid_in_key), forbid_in_value), ))
def test_build_new_forbidden(self): expected = "a categorical {a, b, c} [a]\nb categorical {a, b, c} [c]\n\n" \ "{a=a, b=a}\n{a=a, b=b}\n{a=b, b=a}\n{a=b, b=b}" cs = ConfigurationSpace() a = CategoricalHyperparameter("a", ["a", "b", "c"], "a") b = CategoricalHyperparameter("b", ["a", "b", "c"], "c") cs.add_hyperparameter(a) cs.add_hyperparameter(b) fb = ForbiddenAndConjunction(ForbiddenInClause(a, ["a", "b"]), ForbiddenInClause(b, ["a", "b"])) cs.add_forbidden_clause(fb) value = pcs_new.write(cs) self.assertIn(expected, value)
def get_hyperparameter_search_space(**kwargs): n_clusters_factor = UniformFloatHyperparameter("n_clusters_factor", 0., 1., default_value=1.) affinity = CategoricalHyperparameter("affinity", ["euclidean", "manhattan", "cosine"], default_value="euclidean") linkage = CategoricalHyperparameter("linkage", ["ward", "complete", "average"], default_value="ward") pooling_func = CategoricalHyperparameter("pooling_func", ["mean", "median", "max"], default_value="mean") cs = ConfigurationSpace() cs.add_hyperparameters([n_clusters_factor, affinity, linkage, pooling_func]) affinity_and_linkage = ForbiddenAndConjunction(ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() penalty = Constant("penalty", "l1") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = Constant("dual", "False") # This is set ad-hoc tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling ]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) cs.add_forbidden_clause(penalty_and_loss) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'tol': hp.loguniform('lbs_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('lbs_C', np.log(0.03125), np.log(32768)), 'loss': 'squared_hinge', 'multi_class': 'ovr', 'dual': 'False', 'fit_intercept': 'True', 'intercept_scaling': 1, 'penalty': 'L1' } return space
def __forbidden(self, value: List, store: Dict, cs: ConfigurationSpace): assert isinstance(value, list) for item in value: assert isinstance(item, dict) clauses = [] for k, v in item.items(): if isinstance(v, list) and len(v) == 1: v = v[0] if isinstance(v, list): clauses.append( ForbiddenInClause(store[k], list(map(smac_hdl._encode, v)))) else: clauses.append( ForbiddenEqualsClause(store[k], smac_hdl._encode(v))) cs.add_forbidden_clause(ForbiddenAndConjunction(*clauses))
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_clusters = cs.add_hyperparameter(UniformIntegerHyperparameter( "n_clusters", 2, 400, 25)) affinity = cs.add_hyperparameter(CategoricalHyperparameter( "affinity", ["euclidean", "manhattan", "cosine"], "euclidean")) linkage = cs.add_hyperparameter(CategoricalHyperparameter( "linkage", ["ward", "complete", "average"], "ward")) pooling_func = cs.add_hyperparameter(CategoricalHyperparameter( "pooling_func", ["mean", "median", "max"])) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def read(jason_string): """ Creates a configuration space definition from a json string. Example ------- >>> from ConfigSpace.read_and_write import json >>> with open('configspace.json', 'r') as f: >>> jason_string = f.read() >>> config = json.read(jason_string) Parameters ---------- jason_string : str A json string representing a configuration space definition Returns ------- :class:`~ConfigSpace.configuration_space.ConfigurationSpace` The restored ConfigurationSpace object """ jason = json.loads(jason_string) if 'name' in jason: configuration_space = ConfigurationSpace(name=jason['name']) else: configuration_space = ConfigurationSpace() for hyperparameter in jason['hyperparameters']: configuration_space.add_hyperparameter( _construct_hyperparameter(hyperparameter, )) for condition in jason['conditions']: configuration_space.add_condition( _construct_condition( condition, configuration_space, )) for forbidden in jason['forbiddens']: configuration_space.add_forbidden_clause( _construct_forbidden( forbidden, configuration_space, )) return configuration_space
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, default_value=25) affinity = CategoricalHyperparameter( "affinity", ["euclidean", "manhattan", "cosine"], default_value="euclidean") linkage = CategoricalHyperparameter( "linkage", ["ward", "complete", "average"], default_value="ward") pooling_func = CategoricalHyperparameter( "pooling_func", ["mean", "median", "max"], default_value="mean") cs.add_hyperparameters([n_clusters, affinity, linkage, pooling_func]) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def read(jason_string): """ Creates a configuration space definition from a json string. Example ------- >>> from ConfigSpace.read_and_write import json >>> with open('configspace.json', 'r') as f: >>> jason_string = f.read() >>> config = json.read(jason_string) Parameters ---------- jason_string : str A json string representing a configuration space definition Returns ------- :class:`~ConfigSpace.configuration_space.ConfigurationSpace` The restored ConfigurationSpace object """ jason = json.loads(jason_string) if 'name' in jason: configuration_space = ConfigurationSpace(name=jason['name']) else: configuration_space = ConfigurationSpace() for hyperparameter in jason['hyperparameters']: configuration_space.add_hyperparameter(_construct_hyperparameter( hyperparameter, )) for condition in jason['conditions']: configuration_space.add_condition(_construct_condition( condition, configuration_space, )) for forbidden in jason['forbiddens']: configuration_space.add_forbidden_clause(_construct_forbidden( forbidden, configuration_space, )) return configuration_space
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() epsilon = CategoricalHyperparameter("epsilon", [1e-4, 1e-3, 1e-2, 1e-1, 1], default_value=1e-4) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([epsilon, loss, dual, tol, C, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'loss': hp.choice('liblinear_combination', [{'loss': "epsilon_insensitive", 'dual': "True"}, {'loss': "squared_epsilon_insensitive", 'dual': "True"}, {'loss': "squared_epsilon_insensitive", 'dual': "False"}]), 'dual': None, 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1])} init_trial = {'loss': {'loss': "epsilon_insensitive", 'dual': "True"}, 'tol': 1e-4, 'C': 1, 'fit_intercept': "True", 'intercept_scaling': 1} return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_clusters = cs.add_hyperparameter( UniformIntegerHyperparameter("n_clusters", 2, 400, 25)) affinity = cs.add_hyperparameter( CategoricalHyperparameter("affinity", ["euclidean", "manhattan", "cosine"], "euclidean")) linkage = cs.add_hyperparameter( CategoricalHyperparameter("linkage", ["ward", "complete", "average"], "ward")) pooling_func = cs.add_hyperparameter( CategoricalHyperparameter("pooling_func", ["mean", "median", "max"])) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = cs.add_hyperparameter(Constant("penalty", "l1")) loss = cs.add_hyperparameter(CategoricalHyperparameter( "loss", ["hinge", "squared_hinge"], default="squared_hinge")) dual = cs.add_hyperparameter(Constant("dual", "False")) # This is set ad-hoc tol = cs.add_hyperparameter(UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default=1e-4, log=True)) C = cs.add_hyperparameter(UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default=1.0)) multi_class = cs.add_hyperparameter(Constant("multi_class", "ovr")) # These are set ad-hoc fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter(Constant( "intercept_scaling", 1)) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge") ) cs.add_forbidden_clause(penalty_and_loss) return cs
def get_hyperparameter_search_space(**kwargs): cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = CategoricalHyperparameter("dual", [True, False], default_value=True) tol = UniformFloatHyperparameter("tol", 1e-5, 120., default_value=1e-4) C = UniformFloatHyperparameter("C", 1e-7, 100., default_value=1.) multi_class = CategoricalHyperparameter("multi_class", ["ovr", "crammer_singer"], default_value="ovr") fit_intercept = CategoricalHyperparameter("fit_intercept", [True, False], default_value=True) intercept_scaling = UniformFloatHyperparameter("intercept_scaling", 0., 1., default_value=1.) max_iter = UniformIntegerHyperparameter("max_iter", 100, 2000, default_value=1000) cs.add_hyperparameters( [C, penalty, loss, dual, tol, multi_class, fit_intercept, intercept_scaling, max_iter]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge") ) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, False), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge") ) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, False), ForbiddenEqualsClause(penalty, "l1") ) constant_penalty_and_loss2 = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, True), ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "squared_hinge") ) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) cs.add_forbidden_clause(constant_penalty_and_loss2) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = Constant("dual", "False") # This is set ad-hoc tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling ]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge")) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l1")) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = CategoricalHyperparameter( "penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter( "loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = Constant("dual", "False") # This is set ad-hoc tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge") ) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge") ) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l1") ) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs
def read(pcs_string, debug=False): """ Reads in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- >>> from ConfigSpace.read_and_write import pcs >>> with open('configspace.pcs', 'r') as fh: >>> restored_conf = pcs_new.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The restored ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = {"int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter} try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default_value = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default_value = param_list[-2] param = create["categorical"](name=name, choices=choices, default_value=default_value) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append(ForbiddenEqualsClause( configuration_space.get_hyperparameter(tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause(ForbiddenAndConjunction( *clause_list)) # Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def get_hyperparameter_search_space(cls, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No rescaling algorithm found.") if default is None: defaults = ['min/max', 'standardize', 'none', 'normalize'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter('__choice__', list( available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(preprocessor_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): if not isinstance(condition, AbstractConjunction): dlcs = [condition] else: dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
0.03125, 32768, log=True, default_value=1.0) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="squared_epsilon_insensitive") epsilon = UniformFloatHyperparameter(name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True) dual = Constant("dual", "False") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters( [C, loss, epsilon, dual, tol, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive")) cs.add_forbidden_clause(dual_and_loss) json_utils.write_cs_to_json_file(cs, "LinearSVR")
def add_forbidden( conf_space: ConfigurationSpace, pipeline: List[Tuple[str, autoPyTorchChoice]], matches: np.ndarray, dataset_properties: Dict[str, Any], include: Optional[Dict[str, Any]] = None, exclude: Optional[Dict[str, Any]] = None ) -> ConfigurationSpace: # Not sure if this works for 3D node_i_is_choice = [] node_i_choices_names: List[List[str]] = [] node_i_choices: List[List[Union[autoPyTorchComponent, autoPyTorchChoice]]] = [] all_nodes = [] for node_name, node in pipeline: all_nodes.append(node) is_choice = hasattr(node, "get_available_components") node_i_is_choice.append(is_choice) node_include = include.get( node_name) if include is not None else None node_exclude = exclude.get( node_name) if exclude is not None else None if is_choice: node_i_choices_names.append( [str(element) for element in node.get_available_components( dataset_properties, include=node_include, exclude=node_exclude).keys()] ) node_i_choices.append( list(node.get_available_components( dataset_properties, include=node_include, exclude=node_exclude ).values())) else: node_i_choices_names.append([node_name]) node_i_choices.append([node]) # Find out all chains of choices. Only in such a chain its possible to # have several forbidden constraints choices_chains = [] idx = 0 while idx < len(pipeline): if node_i_is_choice[idx]: chain_start = idx idx += 1 while idx < len(pipeline) and node_i_is_choice[idx]: idx += 1 chain_stop = idx choices_chains.append((chain_start, chain_stop)) idx += 1 for choices_chain in choices_chains: constraints: Set[Tuple] = set() chain_start = choices_chain[0] chain_stop = choices_chain[1] chain_length = chain_stop - chain_start # Add one to have also have chain_length in the range for sub_chain_length in range(2, chain_length + 1): for start_idx in range(chain_start, chain_stop - sub_chain_length + 1): indices = range(start_idx, start_idx + sub_chain_length) node_names = [pipeline[idx][0] for idx in indices] num_node_choices = [] node_choice_names = [] skip_array_shape = [] for idx in indices: node = all_nodes[idx] available_components = node.get_available_components( dataset_properties, include=node_i_choices_names[idx]) assert len(available_components) > 0, len(available_components) skip_array_shape.append(len(available_components)) num_node_choices.append(range(len(available_components))) node_choice_names.append([name for name in available_components]) # Figure out which choices were already abandoned skip_array = np.zeros(skip_array_shape) for product in itertools.product(*num_node_choices): for node_idx, choice_idx in enumerate(product): node_idx += start_idx slices_ = tuple( slice(None) if idx != node_idx else slice(choice_idx, choice_idx + 1) for idx in range(len(matches.shape))) if np.sum(matches[slices_]) == 0: skip_array[product] = 1 for product in itertools.product(*num_node_choices): if skip_array[product]: continue slices = tuple( slice(None) if idx not in indices else slice(product[idx - start_idx], product[idx - start_idx] + 1) for idx in range(len(matches.shape))) if np.sum(matches[slices]) == 0: constraint = tuple([(node_names[i], node_choice_names[i][product[i]]) for i in range(len(product))]) # Check if a more general constraint/forbidden clause # was already added continue_ = False for constraint_length in range(2, len(constraint)): constr_starts = len(constraint) - constraint_length + 1 for constraint_start_idx in range(constr_starts): constraint_end_idx = constraint_start_idx + constraint_length sub_constraint = constraint[constraint_start_idx:constraint_end_idx] if sub_constraint in constraints: continue_ = True break if continue_: break if continue_: continue constraints.add(constraint) forbiddens = [] for i in range(len(product)): forbiddens.append( ForbiddenEqualsClause(conf_space.get_hyperparameter( node_names[i] + ":__choice__"), node_choice_names[i][product[i]])) forbidden = ForbiddenAndConjunction(*forbiddens) conf_space.add_forbidden_clause(forbidden) return conf_space
def read(pcs_string, debug=False): """ Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- .. testsetup:: pcs_new_test from ConfigSpace import ConfigurationSpace import ConfigSpace.hyperparameters as CSH from ConfigSpace.read_and_write import pcs_new cs = ConfigurationSpace() cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3])) with open('configspace.pcs_new', 'w') as f: f.write(pcs_new.write(cs)) .. doctest:: pcs_new_test >>> from ConfigSpace.read_and_write import pcs_new >>> with open('configspace.pcs_new', 'r') as fh: ... deserialized_conf = pcs_new.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The deserialized ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default_value = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default_value = param_list[-2] param = create["categorical"]( name=name, choices=choices, default_value=default_value, ) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default_value = param_list[-2] param = create["ordinal"]( name=name, sequence=sequence, default_value=default_value, ) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': hp = configuration_space.get_hyperparameter(tmp_list[0]) if isinstance(hp, NumericalHyperparameter): if isinstance(hp, IntegerHyperparameter): forbidden_value = int(tmp_list[2]) elif isinstance(hp, FloatHyperparameter): forbidden_value = float(tmp_list[2]) else: raise NotImplementedError if forbidden_value < hp.lower or forbidden_value > hp.upper: raise ValueError( f'forbidden_value is set out of the bound, it needs to' f' be set between [{hp.lower}, {hp.upper}]' f' but its value is {forbidden_value}') elif isinstance( hp, (CategoricalHyperparameter, OrdinalHyperparameter)): hp_values = hp.choices if isinstance(hp, CategoricalHyperparameter)\ else hp.sequence forbidden_value_in_hp_values = tmp_list[2] in hp_values if forbidden_value_in_hp_values: forbidden_value = tmp_list[2] else: raise ValueError( f'forbidden_value is set out of the allowed value ' f'sets, it needs to be one member from {hp_values} ' f'but its value is {forbidden_value}') else: raise ValueError('Unsupported Hyperparamter sorts') clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), forbidden_value)) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append( condition_specification( child_name, element_list, configuration_space, )) else: # now taking care of ands ands = [] for and_part in condition: element_list = [ element for part in condition for element in and_part.split() ] ands.append( condition_specification( child_name, element_list, configuration_space, )) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [ element for element in cond_parts.split() ] ors.append( condition_specification( child_name, element_list, configuration_space, )) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [ element for element in cond_parts.split() ] ands.append( condition_specification( child_name, element_list, configuration_space, )) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification( child_name, element_list, configuration_space, ) configuration_space.add_condition(normal_condition) return configuration_space
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = {"int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default = param_list[-2] param = create["ordinal"](name=name, sequence=sequence, default=default) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append(ForbiddenEqualsClause( configuration_space.get_hyperparameter(tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause(ForbiddenAndConjunction( *clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append(condition_specification(child_name, element_list, configuration_space)) else: # now taking care of ands ands = [] for and_part in condition: element_list = [element for part in condition for element in and_part.split()] ands.append(condition_specification(child_name, element_list, configuration_space)) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [element for element in cond_parts.split()] ors.append(condition_specification(child_name, element_list, configuration_space)) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [element for element in cond_parts.split()] ands.append(condition_specification(child_name, element_list, configuration_space)) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification(child_name, element_list, configuration_space) configuration_space.add_condition(normal_condition) return configuration_space
def get_hyperparameter_search_space(cls, dataset_properties, default=None, include=None, exclude=None): if include is not None and exclude is not None: raise ValueError("The argument include and exclude cannot be used together.") cs = ConfigurationSpace() # Compile a list of all estimator objects for this problem available_estimators = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_estimators) == 0: raise ValueError("No regressors found") if default is None: defaults = ['random_forest', 'support_vector_regression'] + \ list(available_estimators.keys()) for default_ in defaults: if default_ in available_estimators: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break estimator = CategoricalHyperparameter('__choice__', list(available_estimators.keys()), default=default) cs.add_hyperparameter(estimator) for estimator_name in available_estimators.keys(): # We have to retrieve the configuration space every time because # we change the objects it returns. If we reused it, we could not # retrieve the conditions further down # TODO implement copy for hyperparameters and forbidden and # conditions! estimator_configuration_space = available_estimators[ estimator_name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in estimator_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % ( estimator_name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(estimator_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, estimator, estimator_name) cs.add_condition(condition) for condition in available_estimators[estimator_name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): dlcs = condition.get_descendant_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(estimator_name): dlc.child.name = "%s:%s" % ( estimator_name, dlc.child.name) if not dlc.parent.name.startswith(estimator_name): dlc.parent.name = "%s:%s" % ( estimator_name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_estimators[estimator_name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(estimator_name): dlc.hyperparameter.name = "%s:%s" % (estimator_name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def get_hyperspace(data_info, include_estimators=None, include_preprocessors=None): if data_info is None or not isinstance(data_info, dict): data_info = dict() if 'is_sparse' not in data_info: # This dataset is probaby dense data_info['is_sparse'] = False sparse = data_info['is_sparse'] task_type = data_info['task'] multilabel = (task_type == MULTILABEL_CLASSIFICATION) multiclass = (task_type == MULTICLASS_CLASSIFICATION) if task_type in CLASSIFICATION_TASKS: data_info['multilabel'] = multilabel data_info['multiclass'] = multiclass data_info['target_type'] = 'classification' pipe_type = 'classifier' # Components match to be forbidden components_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree"] feature_learning_ = ["kitchen_sinks", "nystroem_sampler"] elif task_type in REGRESSION_TASKS: data_info['target_type'] = 'regression' pipe_type = 'regressor' # Components match to be forbidden components_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] else: raise NotImplementedError() include, exclude = dict(), dict() if include_preprocessors is not None: include["preprocessor"] = include_preprocessors if include_estimators is not None: include[pipe_type] = include_estimators cs = ConfigurationSpace() # Construct pipeline # FIXME OrderedDIct? pipeline = get_pipeline(data_info['task']) # TODO include, exclude, pipeline keys = [pair[0] for pair in pipeline] for key in include: if key not in keys: raise ValueError('Invalid key in include: %s; should be one ' 'of %s' % (key, keys)) for key in exclude: if key not in keys: raise ValueError('Invalid key in exclude: %s; should be one ' 'of %s' % (key, keys)) # Construct hyperspace # TODO What's the 'signed' stands for? if 'signed' not in data_info: # This dataset probably contains unsigned data data_info['signed'] = False match = check_pipeline(pipeline, data_info, include=include, exclude=exclude) # Now we have only legal combinations at this step of the pipeline # Simple sanity checks assert np.sum(match) != 0, "No valid pipeline found." assert np.sum(match) <= np.size(match), \ "'matches' is not binary; %s <= %d, %s" % \ (str(np.sum(match)), np.size(match), str(match.shape)) # Iterate each dimension of the matches array (each step of the # pipeline) to see if we can add a hyperparameter for that step for node_idx, n_ in enumerate(pipeline): node_name, node = n_ is_choice = hasattr(node, "get_available_components") # if the node isn't a choice we can add it immediately because it # must be active (if it wouldn't, np.sum(matches) would be zero if not is_choice: cs.add_configuration_space(node_name, node.get_hyperparameter_search_space(data_info)) # If the node isn't a choice, we have to figure out which of it's # choices are actually legal choices else: choices_list = find_active_choices(match, node, node_idx,data_info, include=include.get(node_name), exclude=exclude.get(node_name)) cs.add_configuration_space(node_name, node.get_hyperparameter_search_space(data_info, include=choices_list)) # And now add forbidden parameter configurations # According to matches if np.sum(match) < np.size(match): cs = add_forbidden(conf_space=cs, pipeline=pipeline, matches=match, dataset_properties=data_info, include=include, exclude=exclude) components = cs.get_hyperparameter('%s:__choice__' % pipe_type).choices availables = pipeline[-1][1].get_available_components(data_info) preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices #available_preprocessors = pipeline[-2][1].get_available_components(data_info) possible_default = copy.copy(list(availables.keys())) default = cs.get_hyperparameter('%s:__choice__' % pipe_type).default del possible_default[possible_default.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in components: # TODO regression dataset_properties=None if SPARSE in availables[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( '%s:__choice__' % pipe_type), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default # which would take too long # Combinations of non-linear models with feature learning: for c, f in itertools.product(components_, feature_learning_): if c not in components: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "%s:__choice__" % pipe_type), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default if task_type in CLASSIFICATION_TASKS: # Won't work # Multinomial NB etc don't use with features learning, pca etc components_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in itertools.product(components_, preproc_with_negative_X): if c not in components: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter('classifier:__choice__').default = default return cs
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter } try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) #Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def read(pcs_string, debug=False): """ Reads in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- >>> from ConfigSpace.read_and_write import pcs_new >>> with open('configspace.pcs', 'r') as fh: >>> restored_conf = pcs_new.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The restored ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = {"int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default_value = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default_value = param_list[-2] param = create["categorical"](name=name, choices=choices, default_value=default_value) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default_value = param_list[-2] param = create["ordinal"](name=name, sequence=sequence, default_value=default_value) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append(ForbiddenEqualsClause( configuration_space.get_hyperparameter(tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause(ForbiddenAndConjunction( *clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append(condition_specification(child_name, element_list, configuration_space)) else: # now taking care of ands ands = [] for and_part in condition: element_list = [element for part in condition for element in and_part.split()] ands.append(condition_specification(child_name, element_list, configuration_space)) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [element for element in cond_parts.split()] ors.append(condition_specification(child_name, element_list, configuration_space)) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [element for element in cond_parts.split()] ands.append(condition_specification(child_name, element_list, configuration_space)) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification(child_name, element_list, configuration_space) configuration_space.add_condition(normal_condition) return configuration_space
def get_hyperparameter_search_space(**kwargs): cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet", "none"], default_value='l2') solver = CategoricalHyperparameter("solver", ["newton-cg", "lbfgs", "liblinear", "sag", "saga"], default_value="lbfgs") dual = CategoricalHyperparameter("dual", choices=[True, False], default_value=False) tol = UniformFloatHyperparameter("tol", lower=1e-7, upper=100., default_value=1.0e-4, log=True) C = UniformFloatHyperparameter("C", lower=1e-7, upper=100., default_value=1.0, log=True) fit_intercept = CategoricalHyperparameter("fit_intercept", choices=[True, False], default_value=True) intercept_scaling = UniformFloatHyperparameter("intercept_scaling", lower=0.0001, upper=2.0, default_value=1.0, log=True) max_iter = UniformIntegerHyperparameter("max_iter", lower=50, upper=10000, default_value=100) multi_class = CategoricalHyperparameter("multi_class", ["ovr", "multinomial", "auto"], default_value="auto") l1_ratio = UniformFloatHyperparameter("l1_ratio", lower=0., upper=1., default_value=0.1) l1_ratio_condition = InCondition(l1_ratio, penalty, ["elasticnet"]) dual_condition = AndConjunction(InCondition(dual, penalty, ["l2"]), InCondition(dual, solver, ["liblinear"])) cs.add_hyperparameters([penalty, solver, dual, tol, C, fit_intercept, intercept_scaling, max_iter, multi_class, l1_ratio]) penaltyAndLbfgs = ForbiddenAndConjunction( ForbiddenEqualsClause(solver, "lbfgs"), ForbiddenInClause(penalty, ["l1", "elasticnet"]) ) penaltyAndNewton = ForbiddenAndConjunction( ForbiddenEqualsClause(solver, "newton-cg"), ForbiddenInClause(penalty, ["l1", "elasticnet"]) ) penaltyAndSag = ForbiddenAndConjunction( ForbiddenEqualsClause(solver, "sag"), ForbiddenInClause(penalty, ["l1", "elasticnet"]) ) penaltyAndSaga = ForbiddenAndConjunction( ForbiddenInClause(penalty, ["elasticnet"]), ForbiddenInClause(solver, ["newton-cg", "lbfgs", "sag"]) ) penaltyAndSagaa = ForbiddenAndConjunction( ForbiddenInClause(penalty, ["elasticnet", "none"]), ForbiddenInClause(solver, ["liblinear"]) ) penaltyAndSagaaa = ForbiddenAndConjunction( ForbiddenInClause(multi_class, ["multinomial"]), ForbiddenInClause(solver, ["liblinear"]) ) cs.add_forbidden_clause(penaltyAndLbfgs) cs.add_forbidden_clause(penaltyAndNewton) cs.add_forbidden_clause(penaltyAndSag) cs.add_forbidden_clause(penaltyAndSagaa) cs.add_forbidden_clause(penaltyAndSaga) cs.add_forbidden_clause(penaltyAndSagaaa) cs.add_condition(l1_ratio_condition) cs.add_condition(dual_condition) return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : ConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_regressors = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_regressor = copy.copy(list( available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default del possible_default_regressor[ possible_default_regressor.index(default)] # A regressor which can handle sparse data after the densifier for key in regressors: if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'regressor:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest", "xgradient_boosting"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "regressor:__choice__"), r), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Create the hyperparameter configuration space. Parameters ---------- include : dict (optional, default=None) Returns ------- """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'classification' if dataset_properties['target_type'] != 'classification': dataset_properties['target_type'] = 'classification' pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) classifiers = cs.get_hyperparameter('classifier:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_classifiers = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_classifier = copy.copy(list( available_classifiers.keys())) default = cs.get_hyperparameter('classifier:__choice__').default del possible_default_classifier[possible_default_classifier.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in classifiers: if SPARSE in available_classifiers[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'classifier:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # which would take too long # Combinations of non-linear models with feature learning: classifiers_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree", "xgradient_boosting"] feature_learning = ["kitchen_sinks", "nystroem_sampler"] for c, f in product(classifiers_, feature_learning): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # Won't work # Multinomial NB etc don't use with features learning, pca etc classifiers_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in product(classifiers_, preproc_with_negative_X): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default return cs