def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() C = cs.add_hyperparameter(UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default=1.0)) loss = cs.add_hyperparameter(CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default="squared_epsilon_insensitive")) # Random Guess epsilon = cs.add_hyperparameter(UniformFloatHyperparameter( name="epsilon", lower=0.001, upper=1, default=0.1, log=True)) dual = cs.add_hyperparameter(Constant("dual", "False")) # These are set ad-hoc tol = cs.add_hyperparameter(UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default=1e-4, log=True)) fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter(Constant( "intercept_scaling", 1)) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = cs.add_hyperparameter(Constant("penalty", "l1")) loss = cs.add_hyperparameter( CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default="squared_hinge")) dual = cs.add_hyperparameter(Constant("dual", "False")) # This is set ad-hoc tol = cs.add_hyperparameter( UniformFloatHyperparameter("tol", 1e-5, 1e-1, default=1e-4, log=True)) C = cs.add_hyperparameter( UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default=1.0)) multi_class = cs.add_hyperparameter(Constant("multi_class", "ovr")) # These are set ad-hoc fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter( Constant("intercept_scaling", 1)) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) cs.add_forbidden_clause(penalty_and_loss) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() C = cs.add_hyperparameter(UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default=1.0)) loss = cs.add_hyperparameter(CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default="squared_epsilon_insensitive")) # Random Guess epsilon = cs.add_hyperparameter(UniformFloatHyperparameter( name="epsilon", lower=0.001, upper=1, default=0.1, log=True)) dual = cs.add_hyperparameter(Constant("dual", "False")) # These are set ad-hoc tol = cs.add_hyperparameter(UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default=1e-4, log=True)) fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter(Constant( "intercept_scaling", 1)) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs
def test_check_forbidden_with_sampled_vector_configuration(self): cs = ConfigurationSpace() metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) cs.add_hyperparameter(metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, vector=np.ones(1, dtype=[('metric', int)])) self.assertRaisesRegexp(ValueError, "violates forbidden clause", cs._check_forbidden, configuration)
def test_check_forbidden_with_sampled_vector_configuration(self): cs = ConfigurationSpace() metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) cs.add_hyperparameter(metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, vector=np.ones(1, dtype=[('metric', int)])) self.assertRaisesRegexp(ValueError, "violates forbidden clause", cs._check_forbidden, configuration)
def test_build_forbidden(self): expected = "a {a, b, c} [a]\nb {a, b, c} [c]\n\n" \ "{a=a, b=a}\n{a=a, b=b}\n{a=b, b=a}\n{a=b, b=b}" cs = ConfigurationSpace() a = CategoricalHyperparameter("a", ["a", "b", "c"], "a") b = CategoricalHyperparameter("b", ["a", "b", "c"], "c") cs.add_hyperparameter(a) cs.add_hyperparameter(b) fb = ForbiddenAndConjunction(ForbiddenInClause(a, ["a", "b"]), ForbiddenInClause(b, ["a", "b"])) cs.add_forbidden_clause(fb) value = pcs_parser.write(cs) self.assertIn(expected, value)
def test_add_forbidden_clause(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) forb = ForbiddenEqualsClause(hp1, 1) # TODO add checking whether a forbidden clause makes sense at all cs.add_forbidden_clause(forb) # TODO add something to properly retrieve the forbidden clauses self.assertEqual(str(cs), "Configuration space object:\n " "Hyperparameters:\n input1, " "Type: Categorical, Choices: {0, 1}, " "Default: 0\n" " Forbidden Clauses:\n" " Forbidden: input1 == 1\n")
def test_add_forbidden_clause(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) forb = ForbiddenEqualsClause(hp1, 1) # TODO add checking whether a forbidden clause makes sense at all cs.add_forbidden_clause(forb) # TODO add something to properly retrieve the forbidden clauses self.assertEqual( str(cs), "Configuration space object:\n " "Hyperparameters:\n input1, " "Type: Categorical, Choices: {0, 1}, " "Default: 0\n" " Forbidden Clauses:\n" " Forbidden: input1 == 1\n")
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_clusters = cs.add_hyperparameter(UniformIntegerHyperparameter( "n_clusters", 2, 400, 25)) affinity = cs.add_hyperparameter(CategoricalHyperparameter( "affinity", ["euclidean", "manhattan", "cosine"], "euclidean")) linkage = cs.add_hyperparameter(CategoricalHyperparameter( "linkage", ["ward", "complete", "average"], "ward")) pooling_func = cs.add_hyperparameter(CategoricalHyperparameter( "pooling_func", ["mean", "median", "max"])) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_clusters = cs.add_hyperparameter( UniformIntegerHyperparameter("n_clusters", 2, 400, 25)) affinity = cs.add_hyperparameter( CategoricalHyperparameter("affinity", ["euclidean", "manhattan", "cosine"], "euclidean")) linkage = cs.add_hyperparameter( CategoricalHyperparameter("linkage", ["ward", "complete", "average"], "ward")) pooling_func = cs.add_hyperparameter( CategoricalHyperparameter("pooling_func", ["mean", "median", "max"])) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = cs.add_hyperparameter(Constant("penalty", "l1")) loss = cs.add_hyperparameter( CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default="squared_hinge") ) dual = cs.add_hyperparameter(Constant("dual", "False")) # This is set ad-hoc tol = cs.add_hyperparameter(UniformFloatHyperparameter("tol", 1e-5, 1e-1, default=1e-4, log=True)) C = cs.add_hyperparameter(UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default=1.0)) multi_class = cs.add_hyperparameter(Constant("multi_class", "ovr")) # These are set ad-hoc fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter(Constant("intercept_scaling", 1)) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge") ) cs.add_forbidden_clause(penalty_and_loss) return cs
def test_check_configuration2(self): # Test that hyperparameters which are not active must not be set and # that evaluating forbidden clauses does not choke on missing # hyperparameters cs = ConfigurationSpace() classifier = CategoricalHyperparameter( "classifier", ["k_nearest_neighbors", "extra_trees"]) metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) p = CategoricalHyperparameter("k_nearest_neighbors:p", [1, 2]) metric_depends_on_classifier = EqualsCondition(metric, classifier, "k_nearest_neighbors") p_depends_on_metric = EqualsCondition(p, metric, "minkowski") cs.add_hyperparameter(metric) cs.add_hyperparameter(p) cs.add_hyperparameter(classifier) cs.add_condition(metric_depends_on_classifier) cs.add_condition(p_depends_on_metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, dict(classifier="extra_trees"))
def test_check_configuration2(self): # Test that hyperparameters which are not active must not be set and # that evaluating forbidden clauses does not choke on missing # hyperparameters cs = ConfigurationSpace() classifier = CategoricalHyperparameter("classifier", ["k_nearest_neighbors", "extra_trees"]) metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) p = CategoricalHyperparameter("k_nearest_neighbors:p", [1, 2]) metric_depends_on_classifier = EqualsCondition(metric, classifier, "k_nearest_neighbors") p_depends_on_metric = EqualsCondition(p, metric, "minkowski") cs.add_hyperparameter(metric) cs.add_hyperparameter(p) cs.add_hyperparameter(classifier) cs.add_condition(metric_depends_on_classifier) cs.add_condition(p_depends_on_metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, dict(classifier="extra_trees"))
def test_add_configuration_space(self): cs = ConfigurationSpace() hp1 = cs.add_hyperparameter(CategoricalHyperparameter("input1", [0, 1])) forb1 = cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 1)) hp2 = cs.add_hyperparameter(UniformIntegerHyperparameter("child", 0, 10)) cond = cs.add_condition(EqualsCondition(hp2, hp1, 0)) cs2 = ConfigurationSpace() cs2.add_configuration_space('prefix', cs, delimiter='__') self.assertEqual(str(cs2), '''Configuration space object: Hyperparameters: prefix__child, Type: UniformInteger, Range: [0, 10], Default: 5 prefix__input1, Type: Categorical, Choices: {0, 1}, Default: 0 Conditions: prefix__child | prefix__input1 == 0 Forbidden Clauses: Forbidden: prefix__input1 == 1 ''')
def test_add_configuration_space(self): cs = ConfigurationSpace() hp1 = cs.add_hyperparameter(CategoricalHyperparameter( "input1", [0, 1])) forb1 = cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 1)) hp2 = cs.add_hyperparameter( UniformIntegerHyperparameter("child", 0, 10)) cond = cs.add_condition(EqualsCondition(hp2, hp1, 0)) cs2 = ConfigurationSpace() cs2.add_configuration_space('prefix', cs, delimiter='__') self.assertEqual( str(cs2), '''Configuration space object: Hyperparameters: prefix__child, Type: UniformInteger, Range: [0, 10], Default: 5 prefix__input1, Type: Categorical, Choices: {0, 1}, Default: 0 Conditions: prefix__child | prefix__input1 == 0 Forbidden Clauses: Forbidden: prefix__input1 == 1 ''')
def get_hyperparameter_search_space(cls, dataset_properties, default=None, include=None, exclude=None): if include is not None and exclude is not None: raise ValueError("The arguments include_estimators and " "exclude_estimators cannot be used together.") cs = ConfigurationSpace() # Compile a list of all estimator objects for this problem available_estimators = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_estimators) == 0: raise ValueError("No classifiers found") if default is None: defaults = ['random_forest', 'liblinear_svc', 'sgd', 'libsvm_svc' ] + list(available_estimators.keys()) for default_ in defaults: if default_ in available_estimators: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break estimator = CategoricalHyperparameter('__choice__', list( available_estimators.keys()), default=default) cs.add_hyperparameter(estimator) for estimator_name in available_estimators.keys(): # We have to retrieve the configuration space every time because # we change the objects it returns. If we reused it, we could not # retrieve the conditions further down # TODO implement copy for hyperparameters and forbidden and # conditions! estimator_configuration_space = available_estimators[ estimator_name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in estimator_configuration_space.get_hyperparameters( ): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (estimator_name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(estimator_configuration_space.get_parents_of( parameter)) == 0: condition = EqualsCondition(new_parameter, estimator, estimator_name) cs.add_condition(condition) for condition in available_estimators[estimator_name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): dlcs = condition.get_descendant_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(estimator_name): dlc.child.name = "%s:%s" % (estimator_name, dlc.child.name) if not dlc.parent.name.startswith(estimator_name): dlc.parent.name = "%s:%s" % (estimator_name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_estimators[estimator_name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(estimator_name): dlc.hyperparameter.name = "%s:%s" % ( estimator_name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_regressors = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_regressor = copy.copy(list( available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default del possible_default_regressor[ possible_default_regressor.index(default)] # A regressor which can handle sparse data after the densifier for key in regressors: if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'regressor:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "regressor:__choice__"), r), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Create the hyperparameter configuration space. Parameters ---------- include : dict (optional, default=None) Returns ------- """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'classification' if dataset_properties['target_type'] != 'classification': dataset_properties['target_type'] = 'classification' pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) classifiers = cs.get_hyperparameter('classifier:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_classifiers = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_classifier = copy.copy(list( available_classifiers.keys())) default = cs.get_hyperparameter('classifier:__choice__').default del possible_default_classifier[possible_default_classifier.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in classifiers: if SPARSE in available_classifiers[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'classifier:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # which would take too long # Combinations of non-linear models with feature learning: classifiers_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree"] feature_learning = ["kitchen_sinks", "nystroem_sampler"] for c, f in product(classifiers_, feature_learning): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # Won't work # Multinomial NB etc don't use with features learning, pca etc classifiers_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in product(classifiers_, preproc_with_negative_X): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default return cs
def get_hyperparameter_search_space(cls, dataset_properties, default=None, include=None, exclude=None): if include is not None and exclude is not None: raise ValueError("The argument include and exclude cannot be used together.") cs = ConfigurationSpace() # Compile a list of all estimator objects for this problem available_estimators = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_estimators) == 0: raise ValueError("No regressors found") if default is None: defaults = ['random_forest', 'support_vector_regression'] + \ list(available_estimators.keys()) for default_ in defaults: if default_ in available_estimators: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break estimator = CategoricalHyperparameter('__choice__', list(available_estimators.keys()), default=default) cs.add_hyperparameter(estimator) for estimator_name in available_estimators.keys(): # We have to retrieve the configuration space every time because # we change the objects it returns. If we reused it, we could not # retrieve the conditions further down # TODO implement copy for hyperparameters and forbidden and # conditions! estimator_configuration_space = available_estimators[ estimator_name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in estimator_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % ( estimator_name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(estimator_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, estimator, estimator_name) cs.add_condition(condition) for condition in available_estimators[estimator_name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): dlcs = condition.get_descendant_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(estimator_name): dlc.child.name = "%s:%s" % ( estimator_name, dlc.child.name) if not dlc.parent.name.startswith(estimator_name): dlc.parent.name = "%s:%s" % ( estimator_name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_estimators[estimator_name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(estimator_name): dlc.hyperparameter.name = "%s:%s" % (estimator_name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance( dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter( 'preprocessor:__choice__').choices available_regressors = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_regressor = copy.copy( list(available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default del possible_default_regressor[possible_default_regressor.index( default)] # A regressor which can handle sparse data after the densifier for key in regressors: if SPARSE in available_regressors[key].get_properties( dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'regressor:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier'))) break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration." ) cs.get_hyperparameter( 'regressor:__choice__').default = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = [ "adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest" ] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter("regressor:__choice__"), r), ForbiddenEqualsClause( cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default return cs
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: print "Skipping: %s" % line continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None # print "Parsing: " + line create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter } try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) #Now handle conditions for condition in conditions: child_name = condition[0] child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) configuration_space.add_condition(condition) if debug: print print "============== Reading Results" print "First 10 lines:" sp_list = ["%s: %s" % (j, str(searchspace[j])) for j in searchspace] print "\n".join(sp_list[:10]) print print "#Invalid lines: %d ( of %d )" % (line_ct - len(conditions) - ct, line_ct) print "#Parameter: %d" % len(searchspace) print "#Conditions: %d" % len(conditions) print "#Conditioned params: %d" % sum([ 1 if len(searchspace[j].conditions[0]) > 0 else 0 for j in searchspace ]) print "#Categorical: %d" % cat_ct print "#Continuous: %d" % cont_ct return configuration_space
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: print("Skipping: %s" % line) continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None # print "Parsing: " + line create = {"int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter} try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append(ForbiddenEqualsClause( configuration_space.get_hyperparameter(tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause(ForbiddenAndConjunction( *clause_list)) #Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = defaultdict(list) for condition in conditions: child_name = condition[0] conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def get_hyperparameter_search_space(cls, dataset_properties, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No preprocessors found, please add NoPreprocessing") if default is None: defaults = ['no_preprocessing', 'select_percentile', 'pca', 'truncatedSVD'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter('__choice__', list( available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(preprocessor_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): if not isinstance(condition, AbstractConjunction): dlcs = [condition] else: dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def get_hyperparameter_search_space(cls, estimator_name, default_estimator, estimator_components, preprocessor_components, dataset_properties, always_active): """Return the configuration space for the CASH problem. This method should be called by the method get_hyperparameter_search_space of a subclass. After the subclass assembles a list of available estimators and preprocessor components, _get_hyperparameter_search_space can be called to do the work of creating the actual HPOlibConfigSpace.configuration_space.ConfigurationSpace object. Parameters ---------- estimator_name : str Name of the estimator hyperparameter which will be used in the configuration space. For a classification task, this would be 'classifier'. estimator_components : dict {name: component} Dictionary with all estimator components to be included in the configuration space. preprocessor_components : dict {name: component} Dictionary with all preprocessor components to be included in the configuration space. . always_active : list of str A list of components which will always be active in the pipeline. This is useful for components like imputation which have hyperparameters to be configured, but which do not have any parent. default_estimator : str Default value for the estimator hyperparameter. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the AutoSklearnClassifier. """ cs = ConfigurationSpace() available_estimators = estimator_components available_preprocessors = preprocessor_components if default_estimator is None: default_estimator = available_estimators.keys()[0] estimator = CategoricalHyperparameter(estimator_name, available_estimators.keys(), default=default_estimator) cs.add_hyperparameter(estimator) for name in available_estimators.keys(): # We have to retrieve the configuration space every time because # we change the objects it returns. If we reused it, we could not # retrieve the conditions further down # TODO implement copy for hyperparameters and forbidden and # conditions! estimator_configuration_space = available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in estimator_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(estimator_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, estimator, name) cs.add_condition(condition) for condition in available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties).get_conditions(): dlcs = condition.get_descendant_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) preprocessor_choices = filter(lambda app: app not in always_active, available_preprocessors.keys()) preprocessor = CategoricalHyperparameter("preprocessor", ["None"] + preprocessor_choices, default='None') cs.add_hyperparameter(preprocessor) for name in available_preprocessors.keys(): preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(preprocessor_configuration_space. get_parents_of( parameter)) == 0 and name not in always_active: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties).get_conditions(): dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.startwith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) # Now try to add things for which we know that they don't work try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "select_percentile_classification:score_func"), "chi2"), ForbiddenEqualsClause(cs.get_hyperparameter( "rescaling:strategy"), "standard") )) except: pass return cs
def get_hyperparameter_search_space(cls, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError("No rescaling algorithm found.") if default is None: defaults = ['min/max', 'standardize', 'none', 'normalize'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters( ): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len( preprocessor_configuration_space.get_parents_of( parameter)) == 0: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): if not isinstance(condition, AbstractConjunction): dlcs = [condition] else: dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % ( name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: print("Skipping: %s" % line) continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None # print "Parsing: " + line create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter } try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) #Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = defaultdict(list) for condition in conditions: child_name = condition[0] conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space