def get_hyperparameter_search_space(cls, dataset_properties, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No preprocessors found, please add NoPreprocessing") if default is None: defaults = ['no_preprocessing', 'select_percentile', 'pca', 'truncatedSVD'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter('__choice__', list( available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(preprocessor_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): if not isinstance(condition, AbstractConjunction): dlcs = [condition] else: dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def get_hyperparameter_search_space(cls, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError("No rescaling algorithm found.") if default is None: defaults = ['min/max', 'standardize', 'none', 'normalize'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters( ): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len( preprocessor_configuration_space.get_parents_of( parameter)) == 0: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).get_conditions(): if not isinstance(condition, AbstractConjunction): dlcs = [condition] else: dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space( dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % ( name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) return cs
def get_hyperparameter_search_space(cls, estimator_name, default_estimator, estimator_components, preprocessor_components, dataset_properties, always_active): """Return the configuration space for the CASH problem. This method should be called by the method get_hyperparameter_search_space of a subclass. After the subclass assembles a list of available estimators and preprocessor components, _get_hyperparameter_search_space can be called to do the work of creating the actual HPOlibConfigSpace.configuration_space.ConfigurationSpace object. Parameters ---------- estimator_name : str Name of the estimator hyperparameter which will be used in the configuration space. For a classification task, this would be 'classifier'. estimator_components : dict {name: component} Dictionary with all estimator components to be included in the configuration space. preprocessor_components : dict {name: component} Dictionary with all preprocessor components to be included in the configuration space. . always_active : list of str A list of components which will always be active in the pipeline. This is useful for components like imputation which have hyperparameters to be configured, but which do not have any parent. default_estimator : str Default value for the estimator hyperparameter. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the AutoSklearnClassifier. """ cs = ConfigurationSpace() available_estimators = estimator_components available_preprocessors = preprocessor_components if default_estimator is None: default_estimator = available_estimators.keys()[0] estimator = CategoricalHyperparameter(estimator_name, available_estimators.keys(), default=default_estimator) cs.add_hyperparameter(estimator) for name in available_estimators.keys(): # We have to retrieve the configuration space every time because # we change the objects it returns. If we reused it, we could not # retrieve the conditions further down # TODO implement copy for hyperparameters and forbidden and # conditions! estimator_configuration_space = available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in estimator_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(estimator_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, estimator, name) cs.add_condition(condition) for condition in available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties).get_conditions(): dlcs = condition.get_descendant_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) preprocessor_choices = filter(lambda app: app not in always_active, available_preprocessors.keys()) preprocessor = CategoricalHyperparameter("preprocessor", ["None"] + preprocessor_choices, default='None') cs.add_hyperparameter(preprocessor) for name in available_preprocessors.keys(): preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(preprocessor_configuration_space. get_parents_of( parameter)) == 0 and name not in always_active: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties).get_conditions(): dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.startwith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) # Now try to add things for which we know that they don't work try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "select_percentile_classification:score_func"), "chi2"), ForbiddenEqualsClause(cs.get_hyperparameter( "rescaling:strategy"), "standard") )) except: pass return cs