예제 #1
0
    def _get_hyperparameter_search_space_pipeline_step(self,
                                                       ps,
                                                       include=None,
                                                       dataset_properties={}):
        if include is not None:
            nodes = include
        else:
            nodes = ps.get_nodes()

        cs = ConfigurationSpace()
        choice = cs.add_hyperparameter(
            CategoricalHyperparameter('__choice__',
                                      [node.get_name() for node in nodes]))

        for node in nodes:
            sub_cs = node.get_hyperparameter_search_space(
                dataset_properties=dataset_properties)
            parent_hyperparameter = {
                'parent': choice,
                'value': node.get_name()
            }
            cs.add_configuration_space(
                node.get_name(),
                sub_cs,
                parent_hyperparameter=parent_hyperparameter)
        return cs
    def get_hyperparameter_search_space(
        self,
        dataset_properties: Optional[Dict[str, str]] = None,
        default: Optional[str] = None,
        include: Optional[List[str]] = None,
        exclude: Optional[List[str]] = None,
    ) -> ConfigurationSpace:
        """Returns the configuration space of the current chosen components

        Args:
            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
            default (Optional[str]): Default component to use
            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
                list, and will exclusively use this components.
            exclude: Optional[Dict[str, Any]]: which components to skip

        Returns:
            ConfigurationSpace: the configuration space of the hyper-parameters of the
                 chosen component
        """
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_initializers = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_initializers) == 0:
            raise ValueError("No initializers found")

        if default is None:
            defaults = [
                'MLPNet',
            ]
            for default_ in defaults:
                if default_ in available_initializers:
                    default = default_
                    break

        initializer = CSH.CategoricalHyperparameter(
            '__choice__',
            list(available_initializers.keys()),
            default_value=default)
        cs.add_hyperparameter(initializer)
        for name in available_initializers:
            initializer_configuration_space = available_initializers[name]. \
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': initializer, 'value': name}
            cs.add_configuration_space(
                name,
                initializer_configuration_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #3
0
    def get_hyperparameter_search_space(
            self,
            dataset_properties: Optional[Dict[str, Any]] = None,
            default: Optional[str] = None,
            include: Optional[List[str]] = None,
            exclude: Optional[List[str]] = None) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = dict()

        dataset_properties = {**self.dataset_properties, **dataset_properties}

        available_preprocessors = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError("no encoders found, please add a encoder")

        if default is None:
            defaults = ['OneHotEncoder', 'OrdinalEncoder', 'NoEncoder']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    if include is not None and default_ not in include:
                        continue
                    if exclude is not None and default_ in exclude:
                        continue
                    default = default_
                    break

        # add only no encoder to choice hyperparameters in case the dataset is only numerical
        if len(dataset_properties['categorical_columns']) == 0:
            default = 'NoEncoder'
            preprocessor = CSH.CategoricalHyperparameter('__choice__',
                                                         ['NoEncoder'],
                                                         default_value=default)
        else:
            preprocessor = CSH.CategoricalHyperparameter(
                '__choice__',
                list(available_preprocessors.keys()),
                default_value=default)

        cs.add_hyperparameter(preprocessor)

        # add only child hyperparameters of early_preprocessor choices
        for name in preprocessor.choices:
            preprocessor_configuration_space = available_preprocessors[name].\
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                preprocessor_configuration_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #4
0
    def get_hyperparameter_search_space(self,
                                        dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None,
                                        default: Optional[str] = None,
                                        include: Optional[List[str]] = None,
                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = dict()

        dataset_properties = {**self.dataset_properties, **dataset_properties}

        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
                                                                include=include,
                                                                exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError("no image normalizers found, please add an image normalizer")

        if default is None:
            defaults = ['ImageNormalizer', 'NoNormalizer']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    if include is not None and default_ not in include:
                        continue
                    if exclude is not None and default_ in exclude:
                        continue
                    default = default_
                    break

        updates = self._get_search_space_updates()
        if '__choice__' in updates.keys():
            choice_hyperparameter = updates['__choice__']
            if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
                raise ValueError("Expected given update for {} to have "
                                 "choices in {} got {}".format(self.__class__.__name__,
                                                               available_preprocessors,
                                                               choice_hyperparameter.value_range))
            preprocessor = CSH.CategoricalHyperparameter('__choice__',
                                                         choice_hyperparameter.value_range,
                                                         default_value=choice_hyperparameter.default_value)
        else:
            preprocessor = CSH.CategoricalHyperparameter('__choice__',
                                                         list(available_preprocessors.keys()),
                                                         default_value=default)
        cs.add_hyperparameter(preprocessor)

        # add only child hyperparameters of preprocessor choices
        for name in preprocessor.choices:
            preprocessor_configuration_space = available_preprocessors[name].\
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(name, preprocessor_configuration_space,
                                       parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #5
0
def load_default_configspace(primitive):
    default_config = ConfigurationSpace()

    if primitive in get_hyperparameters_from_metalearnig():
        default_config.add_configuration_space(
            primitive,
            get_configspace_from_metalearning(
                get_hyperparameters_from_metalearnig()[primitive]), '|')

    return default_config
예제 #6
0
    def get_hyperparameter_search_space(self,
                                        dataset_properties=None,
                                        default=None,
                                        include=None,
                                        exclude=None):
        if dataset_properties is None:
            dataset_properties = {}

        if include is not None and exclude is not None:
            raise ValueError("The arguments include and "
                             "exclude cannot be used together.")

        cs = ConfigurationSpace()

        # Compile a list of all estimator objects for this problem
        available_estimators = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_estimators) == 0:
            raise ValueError("No classifiers found")

        if default is None:
            defaults = ['random_forest', 'liblinear_svc', 'sgd', 'libsvm_svc'
                        ] + list(available_estimators.keys())
            for default_ in defaults:
                if default_ in available_estimators:
                    if include is not None and default_ not in include:
                        continue
                    if exclude is not None and default_ in exclude:
                        continue
                    default = default_
                    break

        estimator = CategoricalHyperparameter('__choice__',
                                              list(
                                                  available_estimators.keys()),
                                              default_value=default)
        cs.add_hyperparameter(estimator)
        for estimator_name in available_estimators.keys():
            estimator_configuration_space = available_estimators[estimator_name].\
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {
                'parent': estimator,
                'value': estimator_name
            }
            cs.add_configuration_space(
                estimator_name,
                estimator_configuration_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #7
0
    def get_hyperparameter_search_space(self, dataset_properties=None,
                                        default=None,
                                        include=None,
                                        exclude=None):
        if dataset_properties is None:
            dataset_properties = {}

        if include is not None and exclude is not None:
            raise ValueError("The arguments include_estimators and "
                             "exclude_estimators cannot be used together.")

        cs = ConfigurationSpace()

        # Compile a list of all estimator objects for this problem
        available_estimators = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_estimators) == 0:
            raise ValueError("No classifiers found")

        if default is None:
            defaults = ['random_forest', 'liblinear_svc', 'sgd',
                        'libsvm_svc'] + list(available_estimators.keys())
            for default_ in defaults:
                if default_ in available_estimators:
                    if include is not None and default_ not in include:
                        continue
                    if exclude is not None and default_ in exclude:
                        continue
                    default = default_
                    break

        estimator = CategoricalHyperparameter('__choice__',
                                              list(available_estimators.keys()),
                                              default_value=default)
        cs.add_hyperparameter(estimator)
        for estimator_name in available_estimators.keys():
            estimator_configuration_space = available_estimators[estimator_name].\
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': estimator,
                                     'value': estimator_name}
            cs.add_configuration_space(estimator_name,
                                       estimator_configuration_space,
                                       parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #8
0
 def _get_hyperparameter_search_space_recursevely(
     dataset_properties: DATASET_PROPERTIES_TYPE,
     cs: ConfigurationSpace,
     transformer: BaseEstimator,
 ) -> ConfigurationSpace:
     for st_name, st_operation in transformer:
         if hasattr(st_operation, "get_hyperparameter_search_space"):
             cs.add_configuration_space(
                 st_name,
                 st_operation.get_hyperparameter_search_space(dataset_properties))
         else:
             return FeatTypeSplit._get_hyperparameter_search_space_recursevely(
                 dataset_properties, cs, st_operation)
     return cs
예제 #9
0
    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
                                        ) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        available_augmenters: Dict[str, BaseImageAugmenter] = get_components()

        if dataset_properties is None:
            dataset_properties = dict()

        # add child hyperparameters
        for name in available_augmenters.keys():
            preprocessor_configuration_space = available_augmenters[name].\
                get_hyperparameter_search_space(dataset_properties)
            cs.add_configuration_space(name, preprocessor_configuration_space)

        return cs
예제 #10
0
    def get_hyperparameter_search_space(
        self,
        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
        default: Optional[str] = None,
        include: Optional[Dict[str, str]] = None,
        exclude: Optional[Dict[str, str]] = None,
    ) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No ohe hot encoders found, please add any one hot encoder "
                "component.")

        if default is None:
            defaults = ['one_hot_encoding', 'no_encoding']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter(
            '__choice__',
            list(available_preprocessors.keys()),
            default_value=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name]. \
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                preprocessor_configuration_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[
        str, str]] = None,
                                        **kwargs: Any) -> ConfigurationSpace:
        cs = ConfigurationSpace()
        backbones: Dict[str, Type[BaseBackbone]] = get_available_backbones()
        heads: Dict[str, Type[BaseHead]] = get_available_heads()

        # filter backbones and heads for those who support the current task type
        if dataset_properties is not None and "task_type" in dataset_properties:
            task = dataset_properties["task_type"]
            backbones = {
                name: backbone
                for name, backbone in backbones.items()
                if task in backbone.supported_tasks
            }
            heads = {
                name: head
                for name, head in heads.items() if task in head.supported_tasks
            }

        backbone_hp = CategoricalHyperparameter("backbone",
                                                choices=backbones.keys())
        head_hp = CategoricalHyperparameter("head", choices=heads.keys())
        cs.add_hyperparameters([backbone_hp, head_hp])

        # for each backbone and head, add a conditional search space if this backbone or head is chosen
        for backbone_name in backbones.keys():
            backbone_cs = backbones[
                backbone_name].get_hyperparameter_search_space(
                    dataset_properties)
            cs.add_configuration_space(backbone_name,
                                       backbone_cs,
                                       parent_hyperparameter={
                                           "parent": backbone_hp,
                                           "value": backbone_name
                                       })

        for head_name in heads.keys():
            head_cs: ConfigurationSpace = heads[
                head_name].get_hyperparameter_search_space(dataset_properties)
            cs.add_configuration_space(head_name,
                                       head_cs,
                                       parent_hyperparameter={
                                           "parent": head_hp,
                                           "value": head_name
                                       })
        return cs
예제 #12
0
    def get_hyperparameter_search_space(self,
                                        dataset_properties=None,
                                        default=None,
                                        include=None,
                                        exclude=None):
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No preprocessors found, please add NoPreprocessing")

        if default is None:
            defaults = [
                'no_preprocessing', 'select_percentile', 'pca', 'truncatedSVD'
            ]
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter(
            '__choice__',
            list(available_preprocessors.keys()),
            default_value=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name]. \
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                preprocessor_configuration_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #13
0
    def get_hyperparameter_search_space(self,
                                        dataset_properties=None,
                                        default=None,
                                        include=None,
                                        exclude=None):
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No rescalers found, please add any rescaling component.")

        if default is None:
            defaults = ['standardize', 'none', 'maxabs', 'minmax', 'normalize']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter(
            '__choice__',
            list(available_preprocessors.keys()),
            default_value=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name]. \
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                preprocessor_configuration_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #14
0
    def get_hyperparameter_search_space(
            self,
            dataset_properties: Optional[Dict] = None,
            default: str = None,
            include: Optional[Dict] = None,
            exclude: Optional[Dict] = None) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No preprocessors found, please add NoPreprocessing")

        if default is None:
            defaults = ["feature_type"]
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter(
            '__choice__',
            list(available_preprocessors.keys()),
            default_value=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name](
                dataset_properties=dataset_properties). \
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                preprocessor_configuration_space,
                parent_hyperparameter=parent_hyperparameter)
        return cs
예제 #15
0
    def get_hyperparameter_search_space(self, dataset_properties=None,
                                        default=None,
                                        include=None,
                                        exclude=None):
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include, exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No preprocessors found, please add NoPreprocessing")

        if default is None:
            defaults = ['no_preprocessing', 'select_percentile', 'pca',
                        'truncatedSVD']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter('__choice__',
                                                 list(
                                                     available_preprocessors.keys()),
                                                 default_value=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name]. \
                get_hyperparameter_search_space(dataset_properties)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(name, preprocessor_configuration_space,
                                       parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #16
0
    def _get_base_search_space(
            self, cs: ConfigurationSpace, dataset_properties: Dict[str, Any],
            include: Optional[Dict[str, Any]], exclude: Optional[Dict[str,
                                                                      Any]],
            pipeline: List[Tuple[str,
                                 autoPyTorchChoice]]) -> ConfigurationSpace:
        if include is None:
            if self.include is None:
                include = {}
            else:
                include = self.include

        keys = [pair[0] for pair in pipeline]
        for key in include:
            if key not in keys:
                raise ValueError('Invalid key in include: %s; should be one '
                                 'of %s' % (key, keys))

        if exclude is None:
            if self.exclude is None:
                exclude = {}
            else:
                exclude = self.exclude

        keys = [pair[0] for pair in pipeline]
        for key in exclude:
            if key not in keys:
                raise ValueError('Invalid key in exclude: %s; should be one '
                                 'of %s' % (key, keys))

        matches = get_match_array(pipeline,
                                  dataset_properties,
                                  include=include,
                                  exclude=exclude)

        # Now we have only legal combinations at this step of the pipeline
        # Simple sanity checks
        assert np.sum(matches) != 0, "No valid pipeline found."

        assert np.sum(matches) <= np.size(matches), \
            "'matches' is not binary; %s <= %d, %s" % \
            (str(np.sum(matches)), np.size(matches), str(matches.shape))

        # Iterate each dimension of the matches array (each step of the
        # pipeline) to see if we can add a hyperparameter for that step
        for node_idx, n_ in enumerate(pipeline):
            node_name, node = n_

            is_choice = isinstance(node, autoPyTorchChoice)

            # if the node isn't a choice we can add it immediately because it
            #  must be active (if it wasn't, np.sum(matches) would be zero
            if not is_choice:
                cs.add_configuration_space(
                    node_name,
                    node.get_hyperparameter_search_space(dataset_properties),
                )
            # If the node is a choice, we have to figure out which of its
            #  choices are actually legal choices
            else:
                choices_list = find_active_choices(matches, node, node_idx,
                                                   dataset_properties,
                                                   include.get(node_name),
                                                   exclude.get(node_name))
                sub_config_space = node.get_hyperparameter_search_space(
                    dataset_properties, include=choices_list)
                cs.add_configuration_space(node_name, sub_config_space)

        # And now add forbidden parameter configurations
        # According to matches
        if np.sum(matches) < np.size(matches):
            cs = add_forbidden(conf_space=cs,
                               pipeline=pipeline,
                               matches=matches,
                               dataset_properties=dataset_properties,
                               include=include,
                               exclude=exclude)

        return cs
예제 #17
0
    def get_hyperparameter_search_space(
        self,
        dataset_properties: Optional[Dict[str,
                                          BaseDatasetPropertiesType]] = None,
        default: Optional[str] = None,
        include: Optional[List[str]] = None,
        exclude: Optional[List[str]] = None,
    ) -> ConfigurationSpace:
        """Returns the configuration space of the current chosen components

        Args:
            dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Describes the dataset to work on
            default (Optional[str]): Default embedding to use
            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
                list, and will exclusively use this components.
            exclude: Optional[Dict[str, Any]]: which components to skip

        Returns:
            ConfigurationSpace: the configuration space of the hyper-parameters of the
                 chosen component
        """
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_embedding = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_embedding) == 0 and 'tabular' in str(
                dataset_properties['task_type']):
            raise ValueError("No embedding found")

        if available_embedding == 0:
            return cs

        if default is None:
            defaults = [
                'NoEmbedding',
                'LearnedEntityEmbedding',
            ]
            for default_ in defaults:
                if default_ in available_embedding:
                    default = default_
                    break

        categorical_columns = dataset_properties['categorical_columns'] \
            if isinstance(dataset_properties['categorical_columns'], List) else []

        updates = self._get_search_space_updates()
        if '__choice__' in updates.keys():
            choice_hyperparameter = updates['__choice__']
            if not set(choice_hyperparameter.value_range).issubset(
                    available_embedding):
                raise ValueError("Expected given update for {} to have "
                                 "choices in {} got {}".format(
                                     self.__class__.__name__,
                                     available_embedding,
                                     choice_hyperparameter.value_range))
            if len(categorical_columns) == 0:
                assert len(choice_hyperparameter.value_range) == 1
                if 'NoEmbedding' not in choice_hyperparameter.value_range:
                    raise ValueError(
                        "Provided {} in choices, however, the dataset "
                        "is incompatible with it".format(
                            choice_hyperparameter.value_range))
            embedding = CSH.CategoricalHyperparameter(
                '__choice__',
                choice_hyperparameter.value_range,
                default_value=choice_hyperparameter.default_value)
        else:

            if len(categorical_columns) == 0:
                default = 'NoEmbedding'
                if include is not None and default not in include:
                    raise ValueError(
                        "Provided {} in include, however, the dataset "
                        "is incompatible with it".format(include))
                embedding = CSH.CategoricalHyperparameter(
                    '__choice__', ['NoEmbedding'], default_value=default)
            else:
                embedding = CSH.CategoricalHyperparameter(
                    '__choice__',
                    list(available_embedding.keys()),
                    default_value=default)

        cs.add_hyperparameter(embedding)
        for name in embedding.choices:
            updates = self._get_search_space_updates(prefix=name)
            config_space = available_embedding[
                name].get_hyperparameter_search_space(
                    dataset_properties,  # type: ignore
                    **updates)
            parent_hyperparameter = {'parent': embedding, 'value': name}
            cs.add_configuration_space(
                name,
                config_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #18
0
    def get_hyperparameter_search_space(
        self,
        dataset_properties: Optional[Dict[str,
                                          BaseDatasetPropertiesType]] = None,
        default: Optional[str] = None,
        include: Optional[List[str]] = None,
        exclude: Optional[List[str]] = None,
    ) -> ConfigurationSpace:
        """Returns the configuration space of the current chosen components

        Args:
            dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Describes the dataset to work on
            default (Optional[str]): Default head to use
            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
                list, and will exclusively use this components.
            exclude: Optional[Dict[str, Any]]: which components to skip

        Returns:
            ConfigurationSpace: the configuration space of the hyper-parameters of the
                 chosen component
        """
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        # Compile a list of legal preprocessors for this problem
        available_heads = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_heads) == 0:
            raise ValueError("No head found")

        if default is None:
            defaults = [
                'FullyConnectedHead',
                'FullyConvolutional2DHead',
            ]
            for default_ in defaults:
                if default_ in available_heads:
                    default = default_
                    break
        updates = self._get_search_space_updates()
        if '__choice__' in updates.keys():
            choice_hyperparameter = updates['__choice__']
            if not set(choice_hyperparameter.value_range).issubset(
                    available_heads):
                raise ValueError("Expected given update for {} to have "
                                 "choices in {} got {}".format(
                                     self.__class__.__name__, available_heads,
                                     choice_hyperparameter.value_range))
            head = CSH.CategoricalHyperparameter(
                '__choice__',
                choice_hyperparameter.value_range,
                default_value=choice_hyperparameter.default_value)
        else:
            head = CSH.CategoricalHyperparameter('__choice__',
                                                 list(available_heads.keys()),
                                                 default_value=default)
        cs.add_hyperparameter(head)
        for name in head.choices:
            updates = self._get_search_space_updates(prefix=name)
            config_space = available_heads[
                name].get_hyperparameter_search_space(
                    dataset_properties,  # type: ignore
                    **updates)
            parent_hyperparameter = {'parent': head, 'value': name}
            cs.add_configuration_space(
                name,
                config_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #19
0
    def get_hyperparameter_search_space(
            self,
            dataset_properties: Optional[Dict[
                str, BaseDatasetPropertiesType]] = None,
            default: Optional[str] = None,
            include: Optional[List[str]] = None,
            exclude: Optional[List[str]] = None) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = dict()

        dataset_properties = {**self.dataset_properties, **dataset_properties}

        available_ = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_) == 0:
            raise ValueError(
                "no feature preprocessors found, please add a feature preprocessor"
            )

        if default is None:
            defaults = [
                'NoFeaturePreprocessor',
                'FastICA',
                'KernelPCA',
                'RandomKitchenSinks',
                'Nystroem',
                'PolynomialFeatures',
                'PowerTransformer',
                'TruncatedSVD',
            ]
            for default_ in defaults:
                if default_ in available_:
                    if include is not None and default_ not in include:
                        continue
                    if exclude is not None and default_ in exclude:
                        continue
                    default = default_
                    break

        numerical_columns = dataset_properties['numerical_columns'] \
            if isinstance(dataset_properties['numerical_columns'], List) else []
        updates = self._get_search_space_updates()
        if '__choice__' in updates.keys():
            choice_hyperparameter = updates['__choice__']
            if not set(choice_hyperparameter.value_range).issubset(available_):
                raise ValueError("Expected given update for {} to have "
                                 "choices in {} got {}".format(
                                     self.__class__.__name__, available_,
                                     choice_hyperparameter.value_range))
            if len(numerical_columns) == 0:
                assert len(choice_hyperparameter.value_range) == 1
                assert 'NoFeaturePreprocessor' in choice_hyperparameter.value_range, \
                    "Provided {} in choices, however, the dataset " \
                    "is incompatible with it".format(choice_hyperparameter.value_range)
            preprocessor = CSH.CategoricalHyperparameter(
                '__choice__',
                choice_hyperparameter.value_range,
                default_value=choice_hyperparameter.default_value)
        else:
            # add only no feature preprocessor to choice hyperparameters in case the dataset is only categorical
            if len(numerical_columns) == 0:
                default = 'NoFeaturePreprocessor'
                if include is not None and default not in include:
                    raise ValueError(
                        "Provided {} in include, however, "
                        "the dataset is incompatible with it".format(include))
                preprocessor = CSH.CategoricalHyperparameter(
                    '__choice__', ['NoFeaturePreprocessor'],
                    default_value=default)
            else:
                # Truncated SVD requires n_features > n_components
                if len(numerical_columns) == 1:
                    del available_['TruncatedSVD']
                preprocessor = CSH.CategoricalHyperparameter(
                    '__choice__',
                    list(available_.keys()),
                    default_value=default)

        cs.add_hyperparameter(preprocessor)

        # add only child hyperparameters of preprocessor choices
        for name in preprocessor.choices:
            updates = self._get_search_space_updates(prefix=name)
            config_space = available_[name].get_hyperparameter_search_space(
                dataset_properties,  # type:ignore
                **updates)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                config_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #20
0
from copy import copy, deepcopy
from pickle import dumps, loads

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter

weights = [0.25, 0.5, 0.25]
hp = CategoricalHyperparameter("B", ["1", "2", "3"], weights=weights)
sub_cs = ConfigurationSpace()
sub_cs.add_hyperparameter(hp)
cs = ConfigurationSpace()
cs.add_configuration_space("A", sub_cs)
print(deepcopy(sub_cs).get_hyperparameter("B").probabilities, weights)
print(copy(sub_cs).get_hyperparameter("B").probabilities, weights)
print(loads(dumps(sub_cs)).get_hyperparameter("B").probabilities, weights)
print(cs.get_hyperparameter("A:B").probabilities, weights)
print(deepcopy(cs).get_hyperparameter("A:B").probabilities, weights)
print(copy(cs).get_hyperparameter("A:B").probabilities, weights)
print(loads(dumps(cs)).get_hyperparameter("A:B").probabilities, weights)
예제 #21
0
    def get_hyperparameter_search_space(
            self,
            dataset_properties: Optional[Dict[
                str, BaseDatasetPropertiesType]] = None,
            default: Optional[str] = None,
            include: Optional[List[str]] = None,
            exclude: Optional[List[str]] = None) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = dict()

        dataset_properties = {**self.dataset_properties, **dataset_properties}

        available_scalers = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_scalers) == 0:
            raise ValueError("no scalers found, please add a scaler")

        if default is None:
            defaults = [
                'StandardScaler', 'Normalizer', 'MinMaxScaler', 'NoScaler'
            ]
            for default_ in defaults:
                if default_ in available_scalers:
                    default = default_
                    break

        numerical_columns = dataset_properties['numerical_columns']\
            if isinstance(dataset_properties['numerical_columns'], List) else []
        updates = self._get_search_space_updates()
        if '__choice__' in updates.keys():
            choice_hyperparameter = updates['__choice__']
            if not set(choice_hyperparameter.value_range).issubset(
                    available_scalers):
                raise ValueError("Expected given update for {} to have "
                                 "choices in {} got {}".format(
                                     self.__class__.__name__,
                                     available_scalers,
                                     choice_hyperparameter.value_range))
            if len(numerical_columns) == 0:
                assert len(choice_hyperparameter.value_range) == 1
                if 'NoScaler' not in choice_hyperparameter.value_range:
                    raise ValueError(
                        "Provided {} in choices, however, the dataset "
                        "is incompatible with it".format(
                            choice_hyperparameter.value_range))

            preprocessor = CSH.CategoricalHyperparameter(
                '__choice__',
                choice_hyperparameter.value_range,
                default_value=choice_hyperparameter.default_value)
        else:
            # add only no scaler to choice hyperparameters in case the dataset is only categorical
            if len(numerical_columns) == 0:
                default = 'NoScaler'
                if include is not None and default not in include:
                    raise ValueError(
                        "Provided {} in include, however, "
                        "the dataset is incompatible with it".format(include))
                preprocessor = CSH.CategoricalHyperparameter(
                    '__choice__', ['NoScaler'], default_value=default)
            else:
                preprocessor = CSH.CategoricalHyperparameter(
                    '__choice__',
                    list(available_scalers.keys()),
                    default_value=default)
        cs.add_hyperparameter(preprocessor)

        # add only child hyperparameters of preprocessor choices
        for name in preprocessor.choices:
            updates = self._get_search_space_updates(prefix=name)
            config_space = available_scalers[
                name].get_hyperparameter_search_space(
                    dataset_properties,  # type:ignore
                    **updates)
            parent_hyperparameter = {'parent': preprocessor, 'value': name}
            cs.add_configuration_space(
                name,
                config_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #22
0
    def recursion(self, hdl: Dict, path=()) -> ConfigurationSpace:
        cs = ConfigurationSpace()
        # 检测一下这个dict是否在直接描述超参
        key_list = list(hdl.keys())
        if len(key_list) == 0:
            cs.add_hyperparameter(Constant("placeholder", "placeholder"))
            return cs
        else:
            sample_key = key_list[0]
            sample_value = hdl[sample_key]
            if is_hdl_bottom(sample_key, sample_value):
                store = {}
                conditions_dict = {}
                for key, value in hdl.items():
                    if key.startswith("__"):
                        conditions_dict[key] = value
                    else:
                        # assert isinstance(value, dict)  # fixme : 可以对常量进行编码
                        hp = self.__parse_dict_to_config(key, value)
                        # hp.name = key
                        cs.add_hyperparameter(hp)
                        store[key] = hp
                for key, value in conditions_dict.items():
                    if key == "__condition":
                        assert isinstance(value, list)
                        for item in value:
                            cond = self.__condition(item, store)
                            cs.add_condition(cond)
                    elif key == "__activate":
                        self.__activate(value, store, cs)
                    elif key == "__forbidden":
                        self.__forbidden(value, store, cs)
                    elif key == "__rely_model":
                        RelyModels.info.append([value, deepcopy(path)])

                return cs
        pattern = re.compile(r"(.*)\((.*)\)")
        for key, value in hdl.items():
            mat = pattern.match(key)
            if mat:
                groups = mat.groups()
                assert len(groups) == 2
                prefix_name, method = groups
                value_list = list(value.keys())
                assert len(value_list) >= 1
                if method == "choice":
                    pass
                else:
                    raise NotImplementedError()
                cur_cs = ConfigurationSpace()
                assert isinstance(value, dict)
                # 不能用constant,会报错
                choice2proba = {}
                not_specific_proba_choices = []
                sum_proba = 0
                for k in value_list:
                    v = value[k]
                    if isinstance(v, dict) and "__proba" in v:
                        proba = v.pop("__proba")
                        choice2proba[k] = proba
                        sum_proba += proba
                    else:
                        not_specific_proba_choices.append(k)
                if sum_proba <= 1:
                    if len(not_specific_proba_choices) > 0:
                        p_rest = (1 -
                                  sum_proba) / len(not_specific_proba_choices)
                        for not_specific_proba_choice in not_specific_proba_choices:
                            choice2proba[not_specific_proba_choice] = p_rest
                else:
                    choice2proba = {k: 1 / len(value_list) for k in value_list}
                proba_list = [choice2proba[k] for k in value_list]
                value_list = list(map(smac_hdl._encode,
                                      value_list))  # choices must be str

                option_param = CategoricalHyperparameter(
                    '__choice__', value_list,
                    weights=proba_list)  # todo : default
                cur_cs.add_hyperparameter(option_param)
                for sub_key, sub_value in value.items():
                    assert isinstance(sub_value, dict)
                    sub_cs = self.recursion(sub_value,
                                            path=list(path) +
                                            [prefix_name, sub_key])
                    parent_hyperparameter = {
                        'parent': option_param,
                        'value': sub_key
                    }
                    cur_cs.add_configuration_space(
                        sub_key,
                        sub_cs,
                        parent_hyperparameter=parent_hyperparameter)
                cs.add_configuration_space(prefix_name, cur_cs)
            elif isinstance(value, dict):
                sub_cs = self.recursion(value, path=list(path) + [key])
                cs.add_configuration_space(key, sub_cs)
            else:
                raise NotImplementedError()

        return cs
예제 #23
0
def get_hyperspace(data_info,
                   include_estimators=None, include_preprocessors=None):

    if data_info is None or not isinstance(data_info, dict):
        data_info = dict()

    if 'is_sparse' not in data_info:
        # This dataset is probaby dense
        data_info['is_sparse'] = False

    sparse = data_info['is_sparse']
    task_type = data_info['task']
    multilabel = (task_type == MULTILABEL_CLASSIFICATION)
    multiclass = (task_type == MULTICLASS_CLASSIFICATION)

    if task_type in CLASSIFICATION_TASKS:
        data_info['multilabel'] = multilabel
        data_info['multiclass'] = multiclass
        data_info['target_type'] = 'classification'
        pipe_type = 'classifier'

        # Components match to be forbidden
        components_ = ["adaboost", "decision_tree", "extra_trees",
                    "gradient_boosting", "k_nearest_neighbors",
                    "libsvm_svc", "random_forest", "gaussian_nb",
                    "decision_tree"]
        feature_learning_ = ["kitchen_sinks", "nystroem_sampler"]
    elif task_type in REGRESSION_TASKS:
        data_info['target_type'] = 'regression'
        pipe_type = 'regressor'

        # Components match to be forbidden
        components_ = ["adaboost", "decision_tree", "extra_trees",
                       "gaussian_process", "gradient_boosting",
                       "k_nearest_neighbors", "random_forest"]
        feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]
    else:
        raise NotImplementedError()

    include, exclude = dict(), dict()
    if include_preprocessors is not None:
        include["preprocessor"] = include_preprocessors
    if include_estimators is not None:
        include[pipe_type] = include_estimators

    cs = ConfigurationSpace()

    # Construct pipeline
    # FIXME OrderedDIct?
    pipeline = get_pipeline(data_info['task'])

    # TODO include, exclude, pipeline
    keys = [pair[0] for pair in pipeline]
    for key in include:
        if key not in keys:
            raise ValueError('Invalid key in include: %s; should be one '
                             'of %s' % (key, keys))

    for key in exclude:
            if key not in keys:
                raise ValueError('Invalid key in exclude: %s; should be one '
                                 'of %s' % (key, keys))

    # Construct hyperspace
    # TODO What's the 'signed' stands for?
    if 'signed' not in data_info:
        # This dataset probably contains unsigned data
        data_info['signed'] = False

    match = check_pipeline(pipeline, data_info,
                           include=include, exclude=exclude)

    # Now we have only legal combinations at this step of the pipeline
    # Simple sanity checks
    assert np.sum(match) != 0, "No valid pipeline found."

    assert np.sum(match) <= np.size(match), \
        "'matches' is not binary; %s <= %d, %s" % \
        (str(np.sum(match)), np.size(match), str(match.shape))

    # Iterate each dimension of the matches array (each step of the
    # pipeline) to see if we can add a hyperparameter for that step
    for node_idx, n_ in enumerate(pipeline):
        node_name, node = n_
        is_choice = hasattr(node, "get_available_components")

        # if the node isn't a choice we can add it immediately because it
        #  must be active (if it wouldn't, np.sum(matches) would be zero
        if not is_choice:
            cs.add_configuration_space(node_name,
                node.get_hyperparameter_search_space(data_info))
        # If the node isn't a choice, we have to figure out which of it's
        #  choices are actually legal choices
        else:
            choices_list = find_active_choices(match, node, node_idx,data_info,
                                               include=include.get(node_name),
                                               exclude=exclude.get(node_name))
            cs.add_configuration_space(node_name,
                node.get_hyperparameter_search_space(data_info,
                                                     include=choices_list))
    # And now add forbidden parameter configurations
    # According to matches
    if np.sum(match) < np.size(match):
        cs = add_forbidden(conf_space=cs, pipeline=pipeline, matches=match,
                           dataset_properties=data_info, include=include, exclude=exclude)

    components = cs.get_hyperparameter('%s:__choice__' % pipe_type).choices
    availables = pipeline[-1][1].get_available_components(data_info)

    preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices
    #available_preprocessors = pipeline[-2][1].get_available_components(data_info)


    possible_default = copy.copy(list(availables.keys()))
    default = cs.get_hyperparameter('%s:__choice__' % pipe_type).default
    del possible_default[possible_default.index(default)]

    # A classifier which can handle sparse data after the densifier is
    # forbidden for memory issues
    for key in components:
        # TODO regression dataset_properties=None
        if SPARSE in availables[key].get_properties()['input']:
            if 'densifier' in preprocessors:
                while True:
                    try:
                        cs.add_forbidden_clause(
                            ForbiddenAndConjunction(
                                ForbiddenEqualsClause(
                                    cs.get_hyperparameter(
                                        '%s:__choice__' % pipe_type), key),
                                ForbiddenEqualsClause(
                                    cs.get_hyperparameter(
                                        'preprocessor:__choice__'), 'densifier')
                            ))
                        # Success
                        break
                    except ValueError:
                        # Change the default and try again
                        try:
                            default = possible_default.pop()
                        except IndexError:
                            raise ValueError("Cannot find a legal default configuration.")
                        cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default

    # which would take too long
    # Combinations of non-linear models with feature learning:
    for c, f in itertools.product(components_, feature_learning_):
        if c not in components:
            continue
        if f not in preprocessors:
            continue
        while True:
            try:
                cs.add_forbidden_clause(ForbiddenAndConjunction(
                    ForbiddenEqualsClause(cs.get_hyperparameter(
                        "%s:__choice__" % pipe_type), c),
                    ForbiddenEqualsClause(cs.get_hyperparameter(
                        "preprocessor:__choice__"), f)))
                break
            except KeyError:
                break
            except ValueError as e:
                # Change the default and try again
                try:
                    default = possible_default.pop()
                except IndexError:
                    raise ValueError(
                        "Cannot find a legal default configuration.")
                cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default


    if task_type in CLASSIFICATION_TASKS:
        # Won't work
        # Multinomial NB etc don't use with features learning, pca etc
        components_ = ["multinomial_nb"]
        preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD",
                                   "fast_ica", "kernel_pca", "nystroem_sampler"]

        for c, f in itertools.product(components_, preproc_with_negative_X):
            if c not in components:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "classifier:__choice__"), c)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter('classifier:__choice__').default = default

    return cs