예제 #1
0
 def set_probabilities_in_cs(self, cs: ConfigurationSpace,
                             relied2models: Dict[str, List[str]],
                             relied2AllModels: Dict[str, List[str]],
                             all_models: List[str], **kwargs):
     estimator = cs.get_hyperparameter("estimator:__choice__")
     probabilities = []
     model2prob = {}
     L = 0
     for rely_model in relied2models:
         cur_models = relied2models[rely_model]
         L += len(cur_models)
         for model in cur_models:
             model2prob[model] = kwargs[rely_model] / len(cur_models)
     p_rest = (1 - sum(model2prob.values())) / (len(all_models) - L)
     for model in estimator.choices:
         probabilities.append(model2prob.get(model, p_rest))
     estimator.probabilities = probabilities
     default_estimator_choice = None
     for models in relied2models.values():
         if models:
             default_estimator_choice = models[0]
     estimator.default_value = default_estimator_choice
     for rely_model, path in RelyModels.info:
         forbid_eq_value = path[-1]
         path = path[:-1]
         forbid_eq_key = ":".join(path + ["__choice__"])
         forbid_eq_key_hp = cs.get_hyperparameter(forbid_eq_key)
         forbid_in_key = "estimator:__choice__"
         hit = relied2AllModels.get(rely_model)
         if not hit:
             choices = list(forbid_eq_key_hp.choices)
             choices.remove(forbid_eq_value)
             forbid_eq_key_hp.choices = tuple(choices)
             forbid_eq_key_hp.default_value = choices[0]
             forbid_eq_key_hp.probabilities = [1 / len(choices)
                                               ] * len(choices)
             # fixme  最后我放弃了在这上面进行修改,在hdl部分就做了预处理
             continue
         forbid_in_value = list(set(all_models) - set(hit))
         # 只选择了boost模型
         if not forbid_in_value:
             continue
         choices = forbid_eq_key_hp.choices
         probabilities = []
         p: float = kwargs[rely_model]
         p_rest = (1 - p) * (len(choices) - 1)
         for choice in choices:
             if choice == forbid_eq_value:
                 probabilities.append(p)
             else:
                 probabilities.append(p_rest)
         forbid_eq_key_hp.probabilities = probabilities
         cs.add_forbidden_clause(
             ForbiddenAndConjunction(
                 ForbiddenEqualsClause(forbid_eq_key_hp, forbid_eq_value),
                 ForbiddenInClause(cs.get_hyperparameter(forbid_in_key),
                                   forbid_in_value),
             ))
예제 #2
0
def _construct_in_condition(
    condition: Dict,
    cs: ConfigurationSpace,
) -> InCondition:
    return InCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        values=condition['values'],
    )
예제 #3
0
def _construct_lt_condition(
    condition: Dict,
    cs: ConfigurationSpace,
) -> LessThanCondition:
    return LessThanCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        value=condition['value'],
    )
예제 #4
0
def _construct_neq_condition(
    condition: Dict,
    cs: ConfigurationSpace,
) -> NotEqualsCondition:
    return NotEqualsCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        value=condition['value'],
    )
예제 #5
0
파일: json.py 프로젝트: automl/ConfigSpace
def _construct_lt_condition(
        condition: Dict,
        cs: ConfigurationSpace,
) -> LessThanCondition:
    return LessThanCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        value=condition['value'],
    )
예제 #6
0
파일: json.py 프로젝트: automl/ConfigSpace
def _construct_neq_condition(
        condition: Dict,
        cs: ConfigurationSpace,
) -> NotEqualsCondition:
    return NotEqualsCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        value=condition['value'],
    )
예제 #7
0
파일: json.py 프로젝트: automl/ConfigSpace
def _construct_in_condition(
        condition: Dict,
        cs: ConfigurationSpace,
) -> InCondition:
    return InCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        values=condition['values'],
    )
예제 #8
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''

        selector = cs.get_hyperparameter("selector")
        regressor = cs.get_hyperparameter("regressor")
        if "PairwiseRegressor" in selector.choices:
            cond = InCondition(child=regressor, parent=selector, values=["PairwiseRegressor"])
            cs.add_condition(cond)
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''

        selector = cs.get_hyperparameter("selector")
        classifier = cs.get_hyperparameter("classifier")
        if "MultiClassifier" in selector.choices:
            cond = InCondition(child=classifier,
                               parent=selector,
                               values=["MultiClassifier"])
            cs.add_condition(cond)
예제 #10
0
def _construct_forbidden_in(
    clause: Dict,
    cs: ConfigurationSpace,
) -> ForbiddenEqualsClause:
    return ForbiddenInClause(hyperparameter=cs.get_hyperparameter(
        clause['name']),
                             values=clause['values'])
예제 #11
0
 def alternative_configuration_recovery(config_list: typing.List[str], cs: ConfigurationSpace):
     """ Used to recover ints and bools as categoricals or constants from trajectory """
     config_dict = {}
     for param in config_list:
         k,v = param.split("=")
         v = v.strip("'")
         hp = cs.get_hyperparameter(k)
         if isinstance(hp, FloatHyperparameter):
             v = float(v)
         elif isinstance(hp, IntegerHyperparameter):
             v = int(v)
         ################# DIFFERENCE: ################
         elif isinstance(hp, CategoricalHyperparameter) or isinstance(hp, Constant):
             if isinstance(hp.default_value, bool):
                 v = True if v == 'True' else False
             elif isinstance(hp.default_value, int):
                 v = int(v)
             elif isinstance(hp.default_value, float):
                 v = float(v)
             else:
                 v = v
         ##############################################
         config_dict[k] = v
     config = Configuration(configuration_space=cs, values=config_dict)
     config.origin = "External Trajectory"
     return config
예제 #12
0
    def _convert_dict_to_config(config_list: typing.List[str], cs: ConfigurationSpace):
        # CAN BE DONE IN CONFIGSPACE
        """Since we save a configurations in a dictionary str->str we have to
        try to figure out the type (int, float, str) of each parameter value

        Parameters
        ----------
        config_list: typing.List[str]
            Configuration as a list of "str='str'"
        cs: ConfigurationSpace
            Configuration Space to translate dict object into Confiuration object
        """
        config_dict = {}
        for param in config_list:
            k, v = param.split("=")
            v = v.strip("'")
            hp = cs.get_hyperparameter(k)
            if isinstance(hp, FloatHyperparameter):
                v = float(v)
            elif isinstance(hp, IntegerHyperparameter):
                v = int(v)
            config_dict[k] = v

        config = Configuration(configuration_space=cs, values=config_dict)
        config.origin = "External Trajectory"

        return config
예제 #13
0
파일: json.py 프로젝트: automl/ConfigSpace
def _construct_forbidden_in(
        clause: Dict,
        cs: ConfigurationSpace,
) -> ForbiddenEqualsClause:
    return ForbiddenInClause(
        hyperparameter=cs.get_hyperparameter(clause['name']),
        values=clause['values']
    )
예제 #14
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''
        try:
            classifier = cs.get_hyperparameter("classifier")
            classifier.choices.append("RandomForest")
        except KeyError:
            classifier = CategoricalHyperparameter(
                "classifier", choices=["RandomForest"], default="RandomForest")
            cs.add_hyperparameter(classifier)

        n_estimators = UniformIntegerHyperparameter(
            name="rf:n_estimators", lower=10, upper=100, default=10, log=True)
        cs.add_hyperparameter(n_estimators)
        criterion = CategoricalHyperparameter(
            name="rf:criterion", choices=["gini", "entropy"], default="gini")
        cs.add_hyperparameter(criterion)
        max_features = CategoricalHyperparameter(
            name="rf:max_features", choices=["sqrt", "log2", None], default="sqrt")
        cs.add_hyperparameter(max_features)
        max_depth = UniformIntegerHyperparameter(
            name="rf:max_depth", lower=10, upper=2**31, default=2**31, log=True)
        cs.add_hyperparameter(max_depth)
        min_samples_split = UniformIntegerHyperparameter(
            name="rf:min_samples_split", lower=2, upper=100, default=2, log=True)
        cs.add_hyperparameter(min_samples_split)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="rf:min_samples_leaf", lower=2, upper=100, default=10, log=True)
        cs.add_hyperparameter(min_samples_leaf)
        bootstrap = CategoricalHyperparameter(
            name="rf:bootstrap", choices=[True, False], default=True)
        cs.add_hyperparameter(bootstrap)

        cond = InCondition(
            child=n_estimators, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(
            child=criterion, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(
            child=max_features, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(
            child=max_depth, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(
            child=min_samples_split, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(
            child=min_samples_leaf, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(
            child=bootstrap, parent=classifier, values=["RandomForest"])
        cs.add_condition(cond)
예제 #15
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''

        try:
            selector = cs.get_hyperparameter("selector")
            selector.choices.append("PairwiseClassifier")
        except KeyError:
            selector = CategoricalHyperparameter(
                "selector",
                choices=["PairwiseClassifier"],
                default="PairwiseClassifier")
            cs.add_hyperparameter(selector)

        classifier = cs.get_hyperparameter("classifier")
        cond = InCondition(child=classifier,
                           parent=selector,
                           values=["PairwiseClassifier"])
        cs.add_condition(cond)
예제 #16
0
    def _convert_dict_to_config(config_list: typing.List[str], cs: ConfigurationSpace) -> Configuration:
        """Since we save a configurations in a dictionary str->str we have to
        try to figure out the type (int, float, str) of each parameter value

        Parameters
        ----------
        config_list: typing.List[str]
            Configuration as a list of "str='str'"
        cs: ConfigurationSpace
            Configuration Space to translate dict object into Confiuration object
        """
        config_dict = {}
        v = ''  # type: typing.Union[str, float, int, bool]
        for param in config_list:
            k, v = param.split("=")
            v = v.strip("'")
            hp = cs.get_hyperparameter(k)
            if isinstance(hp, FloatHyperparameter):
                v = float(v)
            elif isinstance(hp, IntegerHyperparameter):
                v = int(v)
            elif isinstance(hp, (CategoricalHyperparameter, Constant)):
                # Checking for the correct type requires jumping some hoops
                # First, we gather possible interpretations of our string
                interpretations = [v]  # type: typing.List[typing.Union[str, bool, int, float]]
                if v in ["True", "False"]:
                    # Special Case for booleans (assuming we support them)
                    # This is important to avoid false positive warnings triggered by 1 == True or "False" == True
                    interpretations.append(True if v == 'True' else False)
                else:
                    for t in [int, float]:
                        try:
                            interpretations.append(t(v))
                        except ValueError:
                            continue

                # Second, check if it's in the choices / the correct type.
                legal = {interpretation for interpretation in interpretations if hp.is_legal(interpretation)}

                # Third, issue warnings if the interpretation is ambigious
                if len(legal) != 1:
                    logging.getLogger("smac.trajlogger").warning(
                        "Ambigous or no interpretation of value {} for hp {} found ({} possible interpretations). "
                        "Passing string, but this will likely result in an error".format(v, hp.name, len(legal)))
                else:
                    v = legal.pop()

            config_dict[k] = v

        config = Configuration(configuration_space=cs, values=config_dict)
        config.origin = "External Trajectory"

        return config
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''

        try:
            selector = cs.get_hyperparameter("selector")
            selector.choices.append("PairwiseClassifier")
        except KeyError:
            selector = CategoricalHyperparameter(
                "selector", choices=["PairwiseClassifier"], default="PairwiseClassifier")
            cs.add_hyperparameter(selector)
예제 #18
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''
        try:
            regressor = cs.get_hyperparameter("regressor")
            regressor.choices.append("RandomForestRegressor")
            regressor._num_choices += 1
        except KeyError:
            regressor = CategoricalHyperparameter(
                "regressor", choices=["RandomForestRegressor"], default="RandomForestRegressor")
            cs.add_hyperparameter(regressor)

        n_estimators = UniformIntegerHyperparameter(
            name="rfreg:n_estimators", lower=10, upper=100, default=10, log=True)
        cs.add_hyperparameter(n_estimators)
        max_features = CategoricalHyperparameter(
            name="rfreg:max_features", choices=["sqrt", "log2", None], default="sqrt")
        cs.add_hyperparameter(max_features)
        max_depth = UniformIntegerHyperparameter(
            name="rfreg:max_depth", lower=10, upper=2 ** 31, default=2 ** 31, log=True)
        cs.add_hyperparameter(max_depth)
        min_samples_split = UniformIntegerHyperparameter(
            name="rfreg:min_samples_split", lower=2, upper=100, default=2, log=True)
        cs.add_hyperparameter(min_samples_split)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="rfreg:min_samples_leaf", lower=2, upper=100, default=10, log=True)
        cs.add_hyperparameter(min_samples_leaf)
        bootstrap = CategoricalHyperparameter(
            name="rfreg:bootstrap", choices=[True, False], default=True)
        cs.add_hyperparameter(bootstrap)

        cond = InCondition(
            child=n_estimators, parent=regressor, values=["RandomForestRegressor"])
        cs.add_condition(cond)
        cond = InCondition(
            child=max_features, parent=regressor, values=["RandomForestRegressor"])
        cs.add_condition(cond)
        cond = InCondition(
            child=max_depth, parent=regressor, values=["RandomForestRegressor"])
        cs.add_condition(cond)
        cond = InCondition(
            child=min_samples_split, parent=regressor, values=["RandomForestRegressor"])
        cs.add_condition(cond)
        cond = InCondition(
            child=min_samples_leaf, parent=regressor, values=["RandomForestRegressor"])
        cs.add_condition(cond)
        cond = InCondition(
            child=bootstrap, parent=regressor, values=["RandomForestRegressor"])
        cs.add_condition(cond)
예제 #19
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''

        try:
            selector = cs.get_hyperparameter("selector")
            selector.choices.append("PairwiseClassifier")
        except KeyError:
            selector = CategoricalHyperparameter(
                "selector",
                choices=["PairwiseClassifier"],
                default="PairwiseClassifier")
            cs.add_hyperparameter(selector)
예제 #20
0
def get_default_initial_configs(phps: ConfigurationSpace,
                                n_configs) -> List[Configuration]:
    None_name = "None:NoneType"
    phps = deepcopy(phps)
    for config in phps.get_hyperparameters():
        name: str = config.name
        if name.startswith("preprocessing") and name.endswith(
                "__choice__") and (None_name in config.choices):
            config.default_value = None_name

    model_choice = phps.get_hyperparameter("estimator:__choice__")
    ans = []
    for choice in model_choice.choices:
        cur_phps = deepcopy(phps)
        cur_phps.get_hyperparameter(
            "estimator:__choice__").default_value = choice
        default = cur_phps.get_default_configuration()
        ans.append(default)
    if len(ans) < n_configs:
        ans.extend(phps.sample_configuration(n_configs - len(ans)))
    return ans
예제 #21
0
def add_forbidden(
    conf_space: ConfigurationSpace,
    pipeline: List[Tuple[str, autoPyTorchChoice]],
    matches: np.ndarray,
    dataset_properties: Dict[str, Any],
    include: Optional[Dict[str, Any]] = None,
    exclude: Optional[Dict[str, Any]] = None
) -> ConfigurationSpace:
    # Not sure if this works for 3D
    node_i_is_choice = []
    node_i_choices_names: List[List[str]] = []
    node_i_choices: List[List[Union[autoPyTorchComponent, autoPyTorchChoice]]] = []
    all_nodes = []
    for node_name, node in pipeline:
        all_nodes.append(node)
        is_choice = hasattr(node, "get_available_components")
        node_i_is_choice.append(is_choice)

        node_include = include.get(
            node_name) if include is not None else None
        node_exclude = exclude.get(
            node_name) if exclude is not None else None

        if is_choice:
            node_i_choices_names.append(
                [str(element) for element in
                    node.get_available_components(
                    dataset_properties, include=node_include,
                    exclude=node_exclude).keys()]

            )
            node_i_choices.append(
                list(node.get_available_components(
                    dataset_properties, include=node_include,
                    exclude=node_exclude
                ).values()))

        else:
            node_i_choices_names.append([node_name])
            node_i_choices.append([node])

    # Find out all chains of choices. Only in such a chain its possible to
    # have several forbidden constraints
    choices_chains = []
    idx = 0
    while idx < len(pipeline):
        if node_i_is_choice[idx]:
            chain_start = idx
            idx += 1
            while idx < len(pipeline) and node_i_is_choice[idx]:
                idx += 1
            chain_stop = idx
            choices_chains.append((chain_start, chain_stop))
        idx += 1

    for choices_chain in choices_chains:
        constraints: Set[Tuple] = set()

        chain_start = choices_chain[0]
        chain_stop = choices_chain[1]
        chain_length = chain_stop - chain_start

        # Add one to have also have chain_length in the range
        for sub_chain_length in range(2, chain_length + 1):
            for start_idx in range(chain_start, chain_stop - sub_chain_length + 1):
                indices = range(start_idx, start_idx + sub_chain_length)
                node_names = [pipeline[idx][0] for idx in indices]

                num_node_choices = []
                node_choice_names = []
                skip_array_shape = []

                for idx in indices:
                    node = all_nodes[idx]
                    available_components = node.get_available_components(
                        dataset_properties,
                        include=node_i_choices_names[idx])
                    assert len(available_components) > 0, len(available_components)
                    skip_array_shape.append(len(available_components))
                    num_node_choices.append(range(len(available_components)))
                    node_choice_names.append([name for name in available_components])

                # Figure out which choices were already abandoned
                skip_array = np.zeros(skip_array_shape)
                for product in itertools.product(*num_node_choices):
                    for node_idx, choice_idx in enumerate(product):
                        node_idx += start_idx
                        slices_ = tuple(
                            slice(None) if idx != node_idx else
                            slice(choice_idx, choice_idx + 1) for idx in
                            range(len(matches.shape)))

                        if np.sum(matches[slices_]) == 0:
                            skip_array[product] = 1

                for product in itertools.product(*num_node_choices):
                    if skip_array[product]:
                        continue

                    slices = tuple(
                        slice(None) if idx not in indices else
                        slice(product[idx - start_idx],
                              product[idx - start_idx] + 1) for idx in
                        range(len(matches.shape)))

                    if np.sum(matches[slices]) == 0:
                        constraint = tuple([(node_names[i],
                                             node_choice_names[i][product[i]])
                                            for i in range(len(product))])

                        # Check if a more general constraint/forbidden clause
                        #  was already added
                        continue_ = False
                        for constraint_length in range(2, len(constraint)):
                            constr_starts = len(constraint) - constraint_length + 1
                            for constraint_start_idx in range(constr_starts):
                                constraint_end_idx = constraint_start_idx + constraint_length
                                sub_constraint = constraint[constraint_start_idx:constraint_end_idx]
                                if sub_constraint in constraints:
                                    continue_ = True
                                    break
                            if continue_:
                                break
                        if continue_:
                            continue

                        constraints.add(constraint)

                        forbiddens = []
                        for i in range(len(product)):
                            forbiddens.append(
                                ForbiddenEqualsClause(conf_space.get_hyperparameter(
                                    node_names[i] + ":__choice__"),
                                    node_choice_names[i][product[i]]))
                        forbidden = ForbiddenAndConjunction(*forbiddens)
                        conf_space.add_forbidden_clause(forbidden)

    return conf_space
예제 #22
0
def read(pcs_string, debug=False):
    """
    Reads in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
    definition from a pcs file.

    Example
    -------

    >>> from ConfigSpace.read_and_write import pcs_new
    >>> with open('configspace.pcs', 'r') as fh:
    >>>     restored_conf = pcs_new.read(fh)

    Parameters
    ----------
    pcs_string : str
        ConfigSpace definition in pcs format
    debug : bool
        Provides debug information. Defaults to False.

    Returns
    -------
    :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
        The restored ConfigurationSpace object

    """
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    ord_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()
        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" % line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {"int": UniformIntegerHyperparameter,
                  "float": UniformFloatHyperparameter,
                  "categorical": CategoricalHyperparameter,
                  "ordinal": OrdinalHyperparameter
                  }

        try:
            param_list = pp_cont_param.parseString(line)
            name = param_list[0]
            if param_list[1] == 'integer':
                paramtype = 'int'
            elif param_list[1] == 'real':
                paramtype = 'float'
            else:
                paramtype = None

            if paramtype in ['int', 'float']:
                log = param_list[10:]
                param_list = param_list[:10]
                if len(log) > 0:
                    log = log[0]
                lower = float(param_list[3])
                upper = float(param_list[5])
                log_on = True if "log" in log else False
                default_value = float(param_list[8])
                param = create[paramtype](name=name, lower=lower, upper=upper,
                                          q=None, log=log_on, default_value=default_value)
                cont_ct += 1

        except pyparsing.ParseException:
            pass

        try:
            if "categorical" in line:
                param_list = pp_cat_param.parseString(line)
                name = param_list[0]
                choices = [choice for choice in param_list[3:-4:2]]
                default_value = param_list[-2]
                param = create["categorical"](name=name, choices=choices, default_value=default_value)
                cat_ct += 1

            elif "ordinal" in line:
                param_list = pp_ord_param.parseString(line)
                name = param_list[0]
                sequence = [seq for seq in param_list[3:-4:2]]
                default_value = param_list[-2]
                param = create["ordinal"](name=name, sequence=sequence, default_value=default_value)
                ord_ct += 1

        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    # TODO maybe add a check if the hyperparameter is
                    # actually in the configuration space
                    clause_list.append(ForbiddenEqualsClause(
                        configuration_space.get_hyperparameter(tmp_list[0]),
                        tmp_list[2]))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(ForbiddenAndConjunction(
            *clause_list))

    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        for condition in conditions_per_child[child_name]:
            condition = condition[2:]
            condition = ' '.join(condition)
            if '||' in str(condition):
                ors = []
                # 1st case we have a mixture of || and &&
                if '&&' in str(condition):
                    ors_combis = []
                    for cond_parts in str(condition).split('||'):
                        condition = str(cond_parts).split('&&')
                        # if length is 1 it must be or
                        if len(condition) == 1:
                            element_list = condition[0].split()
                            ors_combis.append(condition_specification(child_name, element_list, configuration_space))
                        else:
                            # now taking care of ands
                            ands = []
                            for and_part in condition:
                                element_list = [element for part in condition for element in and_part.split()]
                                ands.append(condition_specification(child_name, element_list, configuration_space))
                            ors_combis.append(AndConjunction(*ands))
                    mixed_conjunction = OrConjunction(*ors_combis)
                    configuration_space.add_condition(mixed_conjunction)
                else:
                    # 2nd case: we only have ors
                    for cond_parts in str(condition).split('||'):
                        element_list = [element for element in cond_parts.split()]
                        ors.append(condition_specification(child_name, element_list, configuration_space))
                    or_conjunction = OrConjunction(*ors)
                    configuration_space.add_condition(or_conjunction)
            else:
                # 3rd case: we only have ands
                if '&&' in str(condition):
                    ands = []
                    for cond_parts in str(condition).split('&&'):
                        element_list = [element for element in cond_parts.split()]
                        ands.append(condition_specification(child_name, element_list, configuration_space))
                    and_conjunction = AndConjunction(*ands)
                    configuration_space.add_condition(and_conjunction)
                else:
                    # 4th case: we have a normal condition
                    element_list = [element for element in condition.split()]
                    normal_condition = condition_specification(child_name, element_list, configuration_space)
                    configuration_space.add_condition(normal_condition)

    return configuration_space
예제 #23
0
    def _get_hyperparameter_search_space(self,
                                         include=None,
                                         exclude=None,
                                         dataset_properties=None):
        """Create the hyperparameter configuration space.

        Parameters
        ----------
        include : dict (optional, default=None)

        Returns
        -------
        """
        cs = ConfigurationSpace()

        if dataset_properties is None or not isinstance(
                dataset_properties, dict):
            dataset_properties = dict()
        if not 'target_type' in dataset_properties:
            dataset_properties['target_type'] = 'classification'
        if dataset_properties['target_type'] != 'classification':
            dataset_properties['target_type'] = 'classification'

        pipeline = self.steps
        cs = self._get_base_search_space(cs=cs,
                                         dataset_properties=dataset_properties,
                                         exclude=exclude,
                                         include=include,
                                         pipeline=pipeline)

        classifiers = cs.get_hyperparameter('classifier:__choice__').choices
        preprocessors = cs.get_hyperparameter(
            'preprocessor:__choice__').choices
        available_classifiers = pipeline[-1][1].get_available_components(
            dataset_properties)
        available_preprocessors = pipeline[-2][1].get_available_components(
            dataset_properties)

        possible_default_classifier = copy.copy(
            list(available_classifiers.keys()))
        default = cs.get_hyperparameter('classifier:__choice__').default
        del possible_default_classifier[possible_default_classifier.index(
            default)]

        # A classifier which can handle sparse data after the densifier is
        # forbidden for memory issues
        for key in classifiers:
            if SPARSE in available_classifiers[key].get_properties()['input']:
                if 'densifier' in preprocessors:
                    while True:
                        try:
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'classifier:__choice__'), key),
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'preprocessor:__choice__'),
                                        'densifier')))
                            # Success
                            break
                        except ValueError:
                            # Change the default and try again
                            try:
                                default = possible_default_classifier.pop()
                            except IndexError:
                                raise ValueError(
                                    "Cannot find a legal default configuration."
                                )
                            cs.get_hyperparameter(
                                'classifier:__choice__').default = default

        # which would take too long
        # Combinations of non-linear models with feature learning:
        classifiers_ = [
            "adaboost", "decision_tree", "extra_trees", "gradient_boosting",
            "k_nearest_neighbors", "libsvm_svc", "random_forest",
            "gaussian_nb", "decision_tree", "xgradient_boosting"
        ]
        feature_learning = ["kitchen_sinks", "nystroem_sampler"]

        for c, f in product(classifiers_, feature_learning):
            if c not in classifiers:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(
                        ForbiddenAndConjunction(
                            ForbiddenEqualsClause(
                                cs.get_hyperparameter("classifier:__choice__"),
                                c),
                            ForbiddenEqualsClause(
                                cs.get_hyperparameter(
                                    "preprocessor:__choice__"), f)))
                    break
                except KeyError:
                    break
                except ValueError as e:
                    # Change the default and try again
                    try:
                        default = possible_default_classifier.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'classifier:__choice__').default = default

        # Won't work
        # Multinomial NB etc don't use with features learning, pca etc
        classifiers_ = ["multinomial_nb"]
        preproc_with_negative_X = [
            "kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca",
            "nystroem_sampler"
        ]

        for c, f in product(classifiers_, preproc_with_negative_X):
            if c not in classifiers:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(
                        ForbiddenAndConjunction(
                            ForbiddenEqualsClause(
                                cs.get_hyperparameter(
                                    "preprocessor:__choice__"), f),
                            ForbiddenEqualsClause(
                                cs.get_hyperparameter("classifier:__choice__"),
                                c)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default_classifier.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'classifier:__choice__').default = default

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #24
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''

        try:
            classifier = cs.get_hyperparameter("classifier")
            if "XGBoost" not in classifier.choices:
                return

            num_round = UniformIntegerHyperparameter(name="xgb:num_round",
                                                     lower=10,
                                                     upper=100,
                                                     default_value=50,
                                                     log=True)
            cs.add_hyperparameter(num_round)
            alpha = UniformFloatHyperparameter(name="xgb:alpha",
                                               lower=0,
                                               upper=10,
                                               default_value=1)
            cs.add_hyperparameter(alpha)
            lambda_ = UniformFloatHyperparameter(name="xgb:lambda",
                                                 lower=1,
                                                 upper=10,
                                                 default_value=1)
            cs.add_hyperparameter(lambda_)
            colsample_bylevel = UniformFloatHyperparameter(
                name="xgb:colsample_bylevel",
                lower=0.5,
                upper=1,
                default_value=1)
            cs.add_hyperparameter(colsample_bylevel)
            colsample_bytree = UniformFloatHyperparameter(
                name="xgb:colsample_bytree",
                lower=0.5,
                upper=1,
                default_value=1)
            cs.add_hyperparameter(colsample_bytree)
            subsample = UniformFloatHyperparameter(name="xgb:subsample",
                                                   lower=0.01,
                                                   upper=1,
                                                   default_value=1)
            cs.add_hyperparameter(subsample)
            max_delta_step = UniformFloatHyperparameter(
                name="xgb:max_delta_step", lower=0, upper=10, default_value=0)
            cs.add_hyperparameter(max_delta_step)
            min_child_weight = UniformFloatHyperparameter(
                name="xgb:min_child_weight",
                lower=0,
                upper=20,
                default_value=1)
            cs.add_hyperparameter(min_child_weight)
            max_depth = UniformIntegerHyperparameter(name="xgb:max_depth",
                                                     lower=1,
                                                     upper=10,
                                                     default_value=6)
            cs.add_hyperparameter(max_depth)
            gamma = UniformFloatHyperparameter(name="xgb:gamma",
                                               lower=0,
                                               upper=10,
                                               default_value=0)
            cs.add_hyperparameter(gamma)
            eta = UniformFloatHyperparameter(name="xgb:eta",
                                             lower=0,
                                             upper=1,
                                             default_value=0.3)
            cs.add_hyperparameter(eta)

            cond = InCondition(child=num_round,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=alpha,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=lambda_,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=colsample_bylevel,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=colsample_bytree,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=subsample,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=max_delta_step,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=min_child_weight,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=max_depth,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=gamma,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
            cond = InCondition(child=eta,
                               parent=classifier,
                               values=["XGBoost"])
            cs.add_condition(cond)
        except:
            return
예제 #25
0
def read(pcs_string, debug=False):
    """
    Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
    definition from a pcs file.

    Example
    -------

    .. testsetup:: pcs_new_test

        from ConfigSpace import ConfigurationSpace
        import ConfigSpace.hyperparameters as CSH
        from ConfigSpace.read_and_write import pcs_new
        cs = ConfigurationSpace()
        cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3]))
        with open('configspace.pcs_new', 'w') as f:
             f.write(pcs_new.write(cs))

    .. doctest:: pcs_new_test

        >>> from ConfigSpace.read_and_write import pcs_new
        >>> with open('configspace.pcs_new', 'r') as fh:
        ...     deserialized_conf = pcs_new.read(fh)

    Parameters
    ----------
    pcs_string : str
        ConfigSpace definition in pcs format
    debug : bool
        Provides debug information. Defaults to False.

    Returns
    -------
    :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
        The deserialized ConfigurationSpace object

    """
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    ord_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()
        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" %
                                          line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {
            "int": UniformIntegerHyperparameter,
            "float": UniformFloatHyperparameter,
            "categorical": CategoricalHyperparameter,
            "ordinal": OrdinalHyperparameter
        }

        try:
            param_list = pp_cont_param.parseString(line)
            name = param_list[0]
            if param_list[1] == 'integer':
                paramtype = 'int'
            elif param_list[1] == 'real':
                paramtype = 'float'
            else:
                paramtype = None

            if paramtype in ['int', 'float']:
                log = param_list[10:]
                param_list = param_list[:10]
                if len(log) > 0:
                    log = log[0]
                lower = float(param_list[3])
                upper = float(param_list[5])
                log_on = True if "log" in log else False
                default_value = float(param_list[8])
                param = create[paramtype](name=name,
                                          lower=lower,
                                          upper=upper,
                                          q=None,
                                          log=log_on,
                                          default_value=default_value)
                cont_ct += 1

        except pyparsing.ParseException:
            pass

        try:
            if "categorical" in line:
                param_list = pp_cat_param.parseString(line)
                name = param_list[0]
                choices = [choice for choice in param_list[3:-4:2]]
                default_value = param_list[-2]
                param = create["categorical"](
                    name=name,
                    choices=choices,
                    default_value=default_value,
                )
                cat_ct += 1

            elif "ordinal" in line:
                param_list = pp_ord_param.parseString(line)
                name = param_list[0]
                sequence = [seq for seq in param_list[3:-4:2]]
                default_value = param_list[-2]
                param = create["ordinal"](
                    name=name,
                    sequence=sequence,
                    default_value=default_value,
                )
                ord_ct += 1

        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    hp = configuration_space.get_hyperparameter(tmp_list[0])
                    if isinstance(hp, NumericalHyperparameter):
                        if isinstance(hp, IntegerHyperparameter):
                            forbidden_value = int(tmp_list[2])
                        elif isinstance(hp, FloatHyperparameter):
                            forbidden_value = float(tmp_list[2])
                        else:
                            raise NotImplementedError
                        if forbidden_value < hp.lower or forbidden_value > hp.upper:
                            raise ValueError(
                                f'forbidden_value is set out of the bound, it needs to'
                                f' be set between [{hp.lower}, {hp.upper}]'
                                f' but its value is {forbidden_value}')
                    elif isinstance(
                            hp,
                        (CategoricalHyperparameter, OrdinalHyperparameter)):
                        hp_values = hp.choices if isinstance(hp, CategoricalHyperparameter)\
                            else hp.sequence
                        forbidden_value_in_hp_values = tmp_list[2] in hp_values
                        if forbidden_value_in_hp_values:
                            forbidden_value = tmp_list[2]
                        else:
                            raise ValueError(
                                f'forbidden_value is set out of the allowed value '
                                f'sets, it needs to be one member from {hp_values} '
                                f'but its value is {forbidden_value}')
                    else:
                        raise ValueError('Unsupported Hyperparamter sorts')

                    clause_list.append(
                        ForbiddenEqualsClause(
                            configuration_space.get_hyperparameter(
                                tmp_list[0]), forbidden_value))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(
            ForbiddenAndConjunction(*clause_list))

    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        for condition in conditions_per_child[child_name]:
            condition = condition[2:]
            condition = ' '.join(condition)
            if '||' in str(condition):
                ors = []
                # 1st case we have a mixture of || and &&
                if '&&' in str(condition):
                    ors_combis = []
                    for cond_parts in str(condition).split('||'):
                        condition = str(cond_parts).split('&&')
                        # if length is 1 it must be or
                        if len(condition) == 1:
                            element_list = condition[0].split()
                            ors_combis.append(
                                condition_specification(
                                    child_name,
                                    element_list,
                                    configuration_space,
                                ))
                        else:
                            # now taking care of ands
                            ands = []
                            for and_part in condition:
                                element_list = [
                                    element for part in condition
                                    for element in and_part.split()
                                ]
                                ands.append(
                                    condition_specification(
                                        child_name,
                                        element_list,
                                        configuration_space,
                                    ))
                            ors_combis.append(AndConjunction(*ands))
                    mixed_conjunction = OrConjunction(*ors_combis)
                    configuration_space.add_condition(mixed_conjunction)
                else:
                    # 2nd case: we only have ors
                    for cond_parts in str(condition).split('||'):
                        element_list = [
                            element for element in cond_parts.split()
                        ]
                        ors.append(
                            condition_specification(
                                child_name,
                                element_list,
                                configuration_space,
                            ))
                    or_conjunction = OrConjunction(*ors)
                    configuration_space.add_condition(or_conjunction)
            else:
                # 3rd case: we only have ands
                if '&&' in str(condition):
                    ands = []
                    for cond_parts in str(condition).split('&&'):
                        element_list = [
                            element for element in cond_parts.split()
                        ]
                        ands.append(
                            condition_specification(
                                child_name,
                                element_list,
                                configuration_space,
                            ))
                    and_conjunction = AndConjunction(*ands)
                    configuration_space.add_condition(and_conjunction)
                else:
                    # 4th case: we have a normal condition
                    element_list = [element for element in condition.split()]
                    normal_condition = condition_specification(
                        child_name,
                        element_list,
                        configuration_space,
                    )
                    configuration_space.add_condition(normal_condition)

    return configuration_space
예제 #26
0
    def get_hyperparameter_search_space(cls, include=None, exclude=None,
                                        dataset_properties=None):
        """Return the configuration space for the CASH problem.

        Parameters
        ----------
        include_estimators : list of str
            If include_estimators is given, only the regressors specified
            are used. Specify them by their module name; e.g., to include
            only the SVM use :python:`include_regressors=['svr']`.
            Cannot be used together with :python:`exclude_regressors`.

        exclude_estimators : list of str
            If exclude_estimators is given, only the regressors specified
            are used. Specify them by their module name; e.g., to include
            all regressors except the SVM use
            :python:`exclude_regressors=['svr']`.
            Cannot be used together with :python:`include_regressors`.

        include_preprocessors : list of str
            If include_preprocessors is given, only the preprocessors specified
            are used. Specify them by their module name; e.g., to include
            only the PCA use :python:`include_preprocessors=['pca']`.
            Cannot be used together with :python:`exclude_preprocessors`.

        exclude_preprocessors : list of str
            If include_preprocessors is given, only the preprocessors specified
            are used. Specify them by their module name; e.g., to include
            all preprocessors except the PCA use
            :python:`exclude_preprocessors=['pca']`.
            Cannot be used together with :python:`include_preprocessors`.

        Returns
        -------
        cs : ConfigSpace.configuration_space.Configuration
            The configuration space describing the SimpleRegressionClassifier.
        """
        cs = ConfigurationSpace()

        if dataset_properties is None or not isinstance(dataset_properties, dict):
            dataset_properties = dict()
        if not 'target_type' in dataset_properties:
            dataset_properties['target_type'] = 'regression'
        if dataset_properties['target_type'] != 'regression':
            dataset_properties['target_type'] = 'regression'

        if 'sparse' not in dataset_properties:
            # This dataset is probaby dense
            dataset_properties['sparse'] = False

        pipeline = cls._get_pipeline()
        cs = cls._get_hyperparameter_search_space(cs, dataset_properties,
                                                  exclude, include, pipeline)

        regressors = cs.get_hyperparameter('regressor:__choice__').choices
        preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices
        available_regressors = pipeline[-1][1].get_available_components(
            dataset_properties)
        available_preprocessors = pipeline[-2][1].get_available_components(
            dataset_properties)

        possible_default_regressor = copy.copy(list(
            available_regressors.keys()))
        default = cs.get_hyperparameter('regressor:__choice__').default
        del possible_default_regressor[
            possible_default_regressor.index(default)]

        # A regressor which can handle sparse data after the densifier
        for key in regressors:
            if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']:
                if 'densifier' in preprocessors:
                    while True:
                        try:
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'regressor:__choice__'), key),
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'preprocessor:__choice__'), 'densifier')
                                ))
                            break
                        except ValueError:
                            # Change the default and try again
                            try:
                                default = possible_default_regressor.pop()
                            except IndexError:
                                raise ValueError(
                                    "Cannot find a legal default configuration.")
                            cs.get_hyperparameter(
                                'regressor:__choice__').default = default

        # which would take too long
        # Combinations of tree-based models with feature learning:
        regressors_ = ["adaboost", "decision_tree", "extra_trees",
                       "gaussian_process", "gradient_boosting",
                       "k_nearest_neighbors", "random_forest", "xgradient_boosting"]
        feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]

        for r, f in product(regressors_, feature_learning_):
            if r not in regressors:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "regressor:__choice__"), r),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default_regressor.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'regressor:__choice__').default = default

        return cs
예제 #27
0
def get_hyperspace(data_info,
                   include_estimators=None, include_preprocessors=None):

    if data_info is None or not isinstance(data_info, dict):
        data_info = dict()

    if 'is_sparse' not in data_info:
        # This dataset is probaby dense
        data_info['is_sparse'] = False

    sparse = data_info['is_sparse']
    task_type = data_info['task']
    multilabel = (task_type == MULTILABEL_CLASSIFICATION)
    multiclass = (task_type == MULTICLASS_CLASSIFICATION)

    if task_type in CLASSIFICATION_TASKS:
        data_info['multilabel'] = multilabel
        data_info['multiclass'] = multiclass
        data_info['target_type'] = 'classification'
        pipe_type = 'classifier'

        # Components match to be forbidden
        components_ = ["adaboost", "decision_tree", "extra_trees",
                    "gradient_boosting", "k_nearest_neighbors",
                    "libsvm_svc", "random_forest", "gaussian_nb",
                    "decision_tree"]
        feature_learning_ = ["kitchen_sinks", "nystroem_sampler"]
    elif task_type in REGRESSION_TASKS:
        data_info['target_type'] = 'regression'
        pipe_type = 'regressor'

        # Components match to be forbidden
        components_ = ["adaboost", "decision_tree", "extra_trees",
                       "gaussian_process", "gradient_boosting",
                       "k_nearest_neighbors", "random_forest"]
        feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]
    else:
        raise NotImplementedError()

    include, exclude = dict(), dict()
    if include_preprocessors is not None:
        include["preprocessor"] = include_preprocessors
    if include_estimators is not None:
        include[pipe_type] = include_estimators

    cs = ConfigurationSpace()

    # Construct pipeline
    # FIXME OrderedDIct?
    pipeline = get_pipeline(data_info['task'])

    # TODO include, exclude, pipeline
    keys = [pair[0] for pair in pipeline]
    for key in include:
        if key not in keys:
            raise ValueError('Invalid key in include: %s; should be one '
                             'of %s' % (key, keys))

    for key in exclude:
            if key not in keys:
                raise ValueError('Invalid key in exclude: %s; should be one '
                                 'of %s' % (key, keys))

    # Construct hyperspace
    # TODO What's the 'signed' stands for?
    if 'signed' not in data_info:
        # This dataset probably contains unsigned data
        data_info['signed'] = False

    match = check_pipeline(pipeline, data_info,
                           include=include, exclude=exclude)

    # Now we have only legal combinations at this step of the pipeline
    # Simple sanity checks
    assert np.sum(match) != 0, "No valid pipeline found."

    assert np.sum(match) <= np.size(match), \
        "'matches' is not binary; %s <= %d, %s" % \
        (str(np.sum(match)), np.size(match), str(match.shape))

    # Iterate each dimension of the matches array (each step of the
    # pipeline) to see if we can add a hyperparameter for that step
    for node_idx, n_ in enumerate(pipeline):
        node_name, node = n_
        is_choice = hasattr(node, "get_available_components")

        # if the node isn't a choice we can add it immediately because it
        #  must be active (if it wouldn't, np.sum(matches) would be zero
        if not is_choice:
            cs.add_configuration_space(node_name,
                node.get_hyperparameter_search_space(data_info))
        # If the node isn't a choice, we have to figure out which of it's
        #  choices are actually legal choices
        else:
            choices_list = find_active_choices(match, node, node_idx,data_info,
                                               include=include.get(node_name),
                                               exclude=exclude.get(node_name))
            cs.add_configuration_space(node_name,
                node.get_hyperparameter_search_space(data_info,
                                                     include=choices_list))
    # And now add forbidden parameter configurations
    # According to matches
    if np.sum(match) < np.size(match):
        cs = add_forbidden(conf_space=cs, pipeline=pipeline, matches=match,
                           dataset_properties=data_info, include=include, exclude=exclude)

    components = cs.get_hyperparameter('%s:__choice__' % pipe_type).choices
    availables = pipeline[-1][1].get_available_components(data_info)

    preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices
    #available_preprocessors = pipeline[-2][1].get_available_components(data_info)


    possible_default = copy.copy(list(availables.keys()))
    default = cs.get_hyperparameter('%s:__choice__' % pipe_type).default
    del possible_default[possible_default.index(default)]

    # A classifier which can handle sparse data after the densifier is
    # forbidden for memory issues
    for key in components:
        # TODO regression dataset_properties=None
        if SPARSE in availables[key].get_properties()['input']:
            if 'densifier' in preprocessors:
                while True:
                    try:
                        cs.add_forbidden_clause(
                            ForbiddenAndConjunction(
                                ForbiddenEqualsClause(
                                    cs.get_hyperparameter(
                                        '%s:__choice__' % pipe_type), key),
                                ForbiddenEqualsClause(
                                    cs.get_hyperparameter(
                                        'preprocessor:__choice__'), 'densifier')
                            ))
                        # Success
                        break
                    except ValueError:
                        # Change the default and try again
                        try:
                            default = possible_default.pop()
                        except IndexError:
                            raise ValueError("Cannot find a legal default configuration.")
                        cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default

    # which would take too long
    # Combinations of non-linear models with feature learning:
    for c, f in itertools.product(components_, feature_learning_):
        if c not in components:
            continue
        if f not in preprocessors:
            continue
        while True:
            try:
                cs.add_forbidden_clause(ForbiddenAndConjunction(
                    ForbiddenEqualsClause(cs.get_hyperparameter(
                        "%s:__choice__" % pipe_type), c),
                    ForbiddenEqualsClause(cs.get_hyperparameter(
                        "preprocessor:__choice__"), f)))
                break
            except KeyError:
                break
            except ValueError as e:
                # Change the default and try again
                try:
                    default = possible_default.pop()
                except IndexError:
                    raise ValueError(
                        "Cannot find a legal default configuration.")
                cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default


    if task_type in CLASSIFICATION_TASKS:
        # Won't work
        # Multinomial NB etc don't use with features learning, pca etc
        components_ = ["multinomial_nb"]
        preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD",
                                   "fast_ica", "kernel_pca", "nystroem_sampler"]

        for c, f in itertools.product(components_, preproc_with_negative_X):
            if c not in components:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "classifier:__choice__"), c)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter('classifier:__choice__').default = default

    return cs
예제 #28
0
    def get_hyperparameter_search_space(cls, include=None, exclude=None,
                                        dataset_properties=None):
        """Create the hyperparameter configuration space.

        Parameters
        ----------
        include : dict (optional, default=None)

        Returns
        -------
        """
        cs = ConfigurationSpace()

        if dataset_properties is None or not isinstance(dataset_properties, dict):
            dataset_properties = dict()
        if not 'target_type' in dataset_properties:
            dataset_properties['target_type'] = 'classification'
        if dataset_properties['target_type'] != 'classification':
            dataset_properties['target_type'] = 'classification'

        pipeline = cls._get_pipeline()
        cs = cls._get_hyperparameter_search_space(cs, dataset_properties,
                                                  exclude, include, pipeline)

        classifiers = cs.get_hyperparameter('classifier:__choice__').choices
        preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices
        available_classifiers = pipeline[-1][1].get_available_components(
            dataset_properties)
        available_preprocessors = pipeline[-2][1].get_available_components(
            dataset_properties)

        possible_default_classifier = copy.copy(list(
            available_classifiers.keys()))
        default = cs.get_hyperparameter('classifier:__choice__').default
        del possible_default_classifier[possible_default_classifier.index(default)]

        # A classifier which can handle sparse data after the densifier is
        # forbidden for memory issues
        for key in classifiers:
            if SPARSE in available_classifiers[key].get_properties()['input']:
                if 'densifier' in preprocessors:
                    while True:
                        try:
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'classifier:__choice__'), key),
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'preprocessor:__choice__'), 'densifier')
                                ))
                            # Success
                            break
                        except ValueError:
                            # Change the default and try again
                            try:
                                default = possible_default_classifier.pop()
                            except IndexError:
                                raise ValueError("Cannot find a legal default configuration.")
                            cs.get_hyperparameter(
                                'classifier:__choice__').default = default

        # which would take too long
        # Combinations of non-linear models with feature learning:
        classifiers_ = ["adaboost", "decision_tree", "extra_trees",
                        "gradient_boosting", "k_nearest_neighbors",
                        "libsvm_svc", "random_forest", "gaussian_nb",
                        "decision_tree", "xgradient_boosting"]
        feature_learning = ["kitchen_sinks", "nystroem_sampler"]

        for c, f in product(classifiers_, feature_learning):
            if c not in classifiers:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "classifier:__choice__"), c),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f)))
                    break
                except KeyError:
                    break
                except ValueError as e:
                    # Change the default and try again
                    try:
                        default = possible_default_classifier.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'classifier:__choice__').default = default

        # Won't work
        # Multinomial NB etc don't use with features learning, pca etc
        classifiers_ = ["multinomial_nb"]
        preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD",
                                   "fast_ica", "kernel_pca", "nystroem_sampler"]

        for c, f in product(classifiers_, preproc_with_negative_X):
            if c not in classifiers:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "classifier:__choice__"), c)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default_classifier.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'classifier:__choice__').default = default

        return cs
예제 #29
0
파일: pcs.py 프로젝트: automl/ConfigSpace
def read(pcs_string, debug=False):
    """
    Reads in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
     definition from a pcs file.

    Example
    -------

    >>> from ConfigSpace.read_and_write import pcs
    >>> with open('configspace.pcs', 'r') as fh:
    >>>     restored_conf = pcs_new.read(fh)

    Parameters
    ----------
    pcs_string : str
        ConfigSpace definition in pcs format
    debug : bool
        Provides debug information. Defaults to False.

    Returns
    -------
    :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
        The restored ConfigurationSpace object

    """
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()

        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" % line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {"int": UniformIntegerHyperparameter,
                  "float": UniformFloatHyperparameter,
                  "categorical": CategoricalHyperparameter}

        try:
            param_list = pp_cont_param.parseString(line)
            il = param_list[9:]
            if len(il) > 0:
                il = il[0]
            param_list = param_list[:9]
            name = param_list[0]
            lower = float(param_list[2])
            upper = float(param_list[4])
            paramtype = "int" if "i" in il else "float"
            log = True if "l" in il else False
            default_value = float(param_list[7])
            param = create[paramtype](name=name, lower=lower, upper=upper,
                                      q=None, log=log, default_value=default_value)
            cont_ct += 1
        except pyparsing.ParseException:
            pass

        try:
            param_list = pp_cat_param.parseString(line)
            name = param_list[0]
            choices = [c for c in param_list[2:-4:2]]
            default_value = param_list[-2]
            param = create["categorical"](name=name, choices=choices,
                                          default_value=default_value)
            cat_ct += 1
        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        # TODO test this properly!
        # TODO Add a try/catch here!
        # noinspection PyUnusedLocal
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    # TODO maybe add a check if the hyperparameter is
                    # actually in the configuration space
                    clause_list.append(ForbiddenEqualsClause(
                        configuration_space.get_hyperparameter(tmp_list[0]),
                        tmp_list[2]))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(ForbiddenAndConjunction(
            *clause_list))

    # Now handle conditions
    # If there are two conditions for one child, these two conditions are an
    # AND-conjunction of conditions, thus we have to connect them
    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        condition_objects = []
        for condition in conditions_per_child[child_name]:
            child = configuration_space.get_hyperparameter(child_name)
            parent_name = condition[2]
            parent = configuration_space.get_hyperparameter(parent_name)
            restrictions = condition[5:-1:2]

            # TODO: cast the type of the restriction!
            if len(restrictions) == 1:
                condition = EqualsCondition(child, parent, restrictions[0])
            else:
                condition = InCondition(child, parent, values=restrictions)
            condition_objects.append(condition)

        # Now we have all condition objects for this child, so we can build a
        #  giant AND-conjunction of them (if number of conditions >= 2)!

        if len(condition_objects) > 1:
            and_conjunction = AndConjunction(*condition_objects)
            configuration_space.add_condition(and_conjunction)
        else:
            configuration_space.add_condition(condition_objects[0])

    return configuration_space
예제 #30
0
    def _get_hyperparameter_search_space(self, include=None, exclude=None,
                                         dataset_properties=None):
        """Return the configuration space for the CASH problem.

        Parameters
        ----------
        include_estimators : list of str
            If include_estimators is given, only the regressors specified
            are used. Specify them by their module name; e.g., to include
            only the SVM use :python:`include_regressors=['svr']`.
            Cannot be used together with :python:`exclude_regressors`.

        exclude_estimators : list of str
            If exclude_estimators is given, only the regressors specified
            are used. Specify them by their module name; e.g., to include
            all regressors except the SVM use
            :python:`exclude_regressors=['svr']`.
            Cannot be used together with :python:`include_regressors`.

        include_preprocessors : list of str
            If include_preprocessors is given, only the preprocessors specified
            are used. Specify them by their module name; e.g., to include
            only the PCA use :python:`include_preprocessors=['pca']`.
            Cannot be used together with :python:`exclude_preprocessors`.

        exclude_preprocessors : list of str
            If include_preprocessors is given, only the preprocessors specified
            are used. Specify them by their module name; e.g., to include
            all preprocessors except the PCA use
            :python:`exclude_preprocessors=['pca']`.
            Cannot be used together with :python:`include_preprocessors`.

        Returns
        -------
        cs : ConfigSpace.configuration_space.Configuration
            The configuration space describing the SimpleRegressionClassifier.
        """
        cs = ConfigurationSpace()

        if dataset_properties is None or not isinstance(dataset_properties, dict):
            dataset_properties = dict()
        if not 'target_type' in dataset_properties:
            dataset_properties['target_type'] = 'regression'
        if dataset_properties['target_type'] != 'regression':
            dataset_properties['target_type'] = 'regression'

        if 'sparse' not in dataset_properties:
            # This dataset is probaby dense
            dataset_properties['sparse'] = False

        cs = self._get_base_search_space(
            cs=cs, dataset_properties=dataset_properties,
            exclude=exclude, include=include, pipeline=self.steps)

        regressors = cs.get_hyperparameter('regressor:__choice__').choices
        preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices
        available_regressors = self._final_estimator.get_available_components(
            dataset_properties)

        possible_default_regressor = copy.copy(list(
            available_regressors.keys()))
        default = cs.get_hyperparameter('regressor:__choice__').default_value
        del possible_default_regressor[
            possible_default_regressor.index(default)]

        # A regressor which can handle sparse data after the densifier is
        # forbidden for memory issues
        for key in regressors:
            if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']:
                if 'densifier' in preprocessors:
                    while True:
                        try:
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'regressor:__choice__'), key),
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'feature_preprocessor:__choice__'), 'densifier')
                                ))
                            # Success
                            break
                        except ValueError:
                            # Change the default and try again
                            try:
                                default = possible_default_regressor.pop()
                            except IndexError:
                                raise ValueError(
                                    "Cannot find a legal default configuration.")
                            cs.get_hyperparameter(
                                'regressor:__choice__').default_value = default

        # which would take too long
        # Combinations of tree-based models with feature learning:
        regressors_ = ["adaboost", "decision_tree", "extra_trees",
                       "gaussian_process", "gradient_boosting",
                       "k_nearest_neighbors", "random_forest", "xgradient_boosting"]
        feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]

        for r, f in product(regressors_, feature_learning_):
            if r not in regressors:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "regressor:__choice__"), r),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "feature_preprocessor:__choice__"), f)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default_regressor.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'regressor:__choice__').default_value = default

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
예제 #31
0
class AutoFolio(object):

    def __init__(self, random_seed: int=12345):
        ''' Constructor 

            Arguments
            ---------
            random_seed: int
                random seed for numpy and random packages
        '''

        np.random.seed(random_seed)  # fix seed
        random.seed(random_seed)

        # I don't know the reason, but without an initial print with
        # logging.info we don't get any output
        logging.info("Init AutoFolio")
        self._root_logger = logging.getLogger()
        self.logger = logging.getLogger("AutoFolio")
        self.cs = None

        self.overwrite_args = None

    def run_cli(self):
        '''
            main method of AutoFolio based on command line interface
        '''

        cmd_parser = CMDParser()
        args_, self.overwrite_args = cmd_parser.parse()

        self._root_logger.setLevel(args_.verbose)

        if args_.load:
            self.read_model_and_predict(
                model_fn=args_.load, feature_vec=list(map(float, args_.feature_vec)))
        else:

            scenario = ASlibScenario()
            if args_.scenario:
                scenario.read_scenario(args_.scenario)
            elif args_.performance_csv and args_.feature_csv:
                scenario.read_from_csv(perf_fn=args_.performance_csv,
                                       feat_fn=args_.feature_csv,
                                       objective=args_.objective,
                                       runtime_cutoff=args_.runtime_cutoff,
                                       maximize=args_.maximize)

            self.cs = self.get_cs(scenario)

            if args_.tune:
                config = self.get_tuned_config(scenario)
            else:
                config = self.cs.get_default_configuration()
            self.logger.debug(config)

            if args_.save:
                feature_pre_pipeline, pre_solver, selector = self.fit(
                    scenario=scenario, config=config)
                self._save_model(
                    args_.save, scenario, feature_pre_pipeline, pre_solver, selector, config)
            else:
                self.run_cv(config=config, scenario=scenario, folds=10)

    def _save_model(self, out_fn: str, scenario: ASlibScenario, feature_pre_pipeline: list, pre_solver: Aspeed, selector, config: Configuration):
        '''
            save all pipeline objects for predictions

            Arguments
            ---------
            out_fn: str
                filename of output file
            scenario: AslibScenario
                ASlib scenario with all the data
            feature_pre_pipeline: list
                list of preprocessing objects
            pre_solver: Aspeed
                aspeed object with pre-solving schedule
            selector: autofolio.selector.*
                fitted selector object
            config: Configuration
                parameter setting configuration
        '''
        scenario.logger = None
        for fpp in feature_pre_pipeline:
            fpp.logger = None
        if pre_solver:
            pre_solver.logger = None
        selector.logger = None
        model = [scenario, feature_pre_pipeline, pre_solver, selector, config]
        with open(out_fn, "bw") as fp:
            pickle.dump(model, fp)

    def read_model_and_predict(self, model_fn: str, feature_vec: list):
        '''
            reads saved model from disk and predicts the selected algorithm schedule for a given feature vector

            Arguments
            --------
            model_fn: str
                file name of saved model
            feature_vec: list
                instance feature vector as a list of floats 
        '''
        with open(model_fn, "br") as fp:
            scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load(
                fp)

        for fpp in feature_pre_pipeline:
            fpp.logger = logging.getLogger("Feature Preprocessing")
        if pre_solver:
            pre_solver.logger = logging.getLogger("Aspeed PreSolving")
        selector.logger = logging.getLogger("Selector")

        # saved scenario is adapted to given feature vector
        feature_vec = np.array([feature_vec])
        scenario.feature_data = pd.DataFrame(
            feature_vec, index=["pseudo_instance"], columns=scenario.feature_names)
        scenario.instances = ["pseudo_instance"]
        pred = self.predict(scenario=scenario, config=config,
                            feature_pre_pipeline=feature_pre_pipeline, pre_solver=pre_solver, selector=selector)

        print("Selected Schedule [(algorithm, budget)]: %s" % (
            pred["pseudo_instance"]))

    def get_cs(self, scenario: ASlibScenario):
        '''
            returns the parameter configuration space of AutoFolio
            (based on the automl config space: https://github.com/automl/ConfigSpace)

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
        '''

        self.cs = ConfigurationSpace()

        # add feature steps as binary parameters
        for fs in scenario.feature_steps:
            fs_param = CategoricalHyperparameter(name="fgroup_%s" % (
                fs), choices=[True, False], default=fs in scenario.feature_steps_default)
            self.cs.add_hyperparameter(fs_param)

        # preprocessing
        PCAWrapper.add_params(self.cs)
        ImputerWrapper.add_params(self.cs)
        StandardScalerWrapper.add_params(self.cs)

        # Pre-Solving
        if scenario.performance_type[0] == "runtime":
            Aspeed.add_params(
                cs=self.cs, cutoff=scenario.algorithm_cutoff_time)

        # classifiers
        RandomForest.add_params(self.cs)

        # selectors
        PairwiseClassifier.add_params(self.cs)

        return self.cs

    def get_tuned_config(self, scenario: ASlibScenario):
        '''
            uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario

            Arguments
            ---------
            scenario: ASlibScenario
                ASlib Scenario at hand

            Returns
            -------
            Configuration
                best incumbent configuration found by SMAC
        '''

        taf = ExecuteTAFunc(functools.partial(self.run_cv, scenario=scenario))

        ac_scenario = Scenario({"run_obj": "quality",  # we optimize quality
                                # at most 10 function evaluations
                                "runcount-limit": 10,
                                "cs": self.cs,  # configuration space
                                "deterministic": "true"
                                })

        # necessary to use stats options related to scenario information
        AC_Stats.scenario = ac_scenario

        # Optimize
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        self.logger.info("Start Configuration")
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        smbo = SMBO(scenario=ac_scenario, tae_runner=taf,
                    rng=np.random.RandomState(42))
        smbo.run(max_iters=999)

        AC_Stats.print_stats()
        self.logger.info("Final Incumbent: %s" % (smbo.incumbent))

        return smbo.incumbent

    def run_cv(self, config: Configuration, scenario: ASlibScenario, folds=10):
        '''
            run a cross fold validation based on the given data from cv.arff

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            folds: int
                number of cv-splits
        '''
        try:
            if scenario.performance_type[0] == "runtime":
                cv_stat = Stats(runtime_cutoff=scenario.algorithm_cutoff_time)
            else:
                cv_stat = Stats(runtime_cutoff=0)
            for i in range(1, folds + 1):
                self.logger.info("CV-Iteration: %d" % (i))
                test_scenario, training_scenario = scenario.get_split(indx=i)

                feature_pre_pipeline, pre_solver, selector = self.fit(
                    scenario=training_scenario, config=config)

                schedules = self.predict(
                    test_scenario, config, feature_pre_pipeline, pre_solver, selector)

                val = Validator()
                if scenario.performance_type[0] == "runtime":
                    stats = val.validate_runtime(
                        schedules=schedules, test_scenario=test_scenario)
                elif scenario.performance_type[0] == "solution_quality":
                    stats = val.validate_quality(
                        schedules=schedules, test_scenario=test_scenario)
                else:
                    raise ValueError("Unknown performance_type[0]")
                cv_stat.merge(stat=stats)

            self.logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
            self.logger.info("CV Stats")
            par10 = cv_stat.show()
        except ValueError:
            traceback.print_exc()
            if not scenario.maximize[0]:
                par10 = scenario.algorithm_cutoff_time * 10
            else:
                par10 = scenario.algorithm_cutoff_time * -10

        if scenario.maximize[0]:
            par10 *= -1

        return par10

    def fit(self, scenario: ASlibScenario, config: Configuration):
        '''
            fit AutoFolio on given ASlib Scenario

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing

            Returns
            -------
                list of fitted feature preproccessing objects
                pre-solving object
                fitted selector
        '''
        self.logger.info("Given Configuration: %s" % (config))

        if self.overwrite_args:
            config = self._overwrite_configuration(
                config=config, overwrite_args=self.overwrite_args)
            self.logger.info("Overwritten Configuration: %s" % (config))

        scenario, feature_pre_pipeline = self.fit_transform_feature_preprocessing(
            scenario, config)

        pre_solver = self.fit_pre_solving(scenario, config)

        selector = self.fit_selector(scenario, config)

        return feature_pre_pipeline, pre_solver, selector

    def _overwrite_configuration(self, config: Configuration, overwrite_args: list):
        '''
            overwrites a given configuration with some new settings

            Arguments
            ---------
            config: Configuration
                initial configuration to be adapted
            overwrite_args: list
                new parameter settings as a list of strings

            Returns
            -------
            Configuration
        '''

        def pairwise(iterable):
            a, b = tee(iterable)
            next(b, None)
            return zip(a, b)

        dict_conf = config.get_dictionary()
        for param, value in pairwise(overwrite_args):
            if dict_conf.get(param):
                if type(self.cs.get_hyperparameter(param)) is UniformIntegerHyperparameter:
                    dict_conf[param] = int(value)
                elif type(self.cs.get_hyperparameter(param)) is UniformFloatHyperparameter:
                    dict_conf[param] = float(value)
                elif value == "True":
                    dict_conf[param] = True
                elif value == "False":
                    dict_conf[param] = False
                else:
                    dict_conf[param] = value
            else:
                self.logger.warn(
                    "Unknown given parameter: %s %s" % (param, value))
        config = Configuration(self.cs, values=dict_conf)

        return config

    def fit_transform_feature_preprocessing(self, scenario: ASlibScenario, config: Configuration):
        '''
            performs feature preprocessing on a given ASlib scenario wrt to a given configuration

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing

            Returns
            -------
                list of fitted feature preproccessing objects
        '''

        pipeline = []
        fgf = FeatureGroupFiltering()
        scenario = fgf.fit_transform(scenario, config)

        imputer = ImputerWrapper()
        scenario = imputer.fit_transform(scenario, config)

        scaler = StandardScalerWrapper()
        scenario = scaler.fit_transform(scenario, config)

        pca = PCAWrapper()
        scenario = pca.fit_transform(scenario, config)

        return scenario, [fgf, imputer, scaler, pca]

    def fit_pre_solving(self, scenario: ASlibScenario, config: Configuration):
        '''
            fits an pre-solving schedule using Aspeed [Hoos et al, 2015 TPLP) 

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing

            Returns
            -------
            instance of Aspeed() with a fitted pre-solving schedule if performance_type of scenario is runtime; else None
        '''
        if scenario.performance_type[0] == "runtime":
            aspeed = Aspeed()
            aspeed.fit(scenario=scenario, config=config)
            return aspeed
        else:
            return None

    def fit_selector(self, scenario: ASlibScenario, config: Configuration):
        '''
            fits an algorithm selector for a given scenario wrt a given configuration

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration
        '''

        if config.get("selector") == "PairwiseClassifier":

            clf_class = None
            if config.get("classifier") == "RandomForest":
                clf_class = RandomForest

            selector = PairwiseClassifier(classifier_class=clf_class)
            selector.fit(scenario=scenario, config=config)

        return selector

    def predict(self, scenario: ASlibScenario, config: Configuration, feature_pre_pipeline: list, pre_solver: Aspeed, selector):
        '''
            predicts algorithm schedules wrt a given config
            and given pipelines

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration
            feature_pre_pipeline: list
                list of fitted feature preprocessors
            pre_solver: Aspeed
                pre solver object with a saved static schedule
            selector: autofolio.selector.*
                fitted selector object
        '''

        self.logger.info("Predict on Test")
        for f_pre in feature_pre_pipeline:
            scenario = f_pre.transform(scenario)

        if pre_solver:
            pre_solving_schedule = pre_solver.predict(scenario=scenario)
        else:
            pre_solving_schedule = {}

        pred_schedules = selector.predict(scenario=scenario)

        # combine schedules
        if pre_solving_schedule:
            return dict((inst, pre_solving_schedule.get(inst, []) + schedule) for inst, schedule in pred_schedules.items())
        else:
            return pred_schedules
예제 #32
0
class AutoFolio(object):

    def __init__(self, random_seed: int=12345):
        ''' Constructor 

            Arguments
            ---------
            random_seed: int
                random seed for numpy and random packages
        '''

        np.random.seed(random_seed)  # fix seed
        random.seed(random_seed)

        # I don't know the reason, but without an initial print with
        # logging.info we don't get any output
        logging.info("Init AutoFolio")
        self._root_logger = logging.getLogger()
        self.logger = logging.getLogger("AutoFolio")
        self.cs = None

        self.overwrite_args = None

    def run_cli(self):
        '''
            main method of AutoFolio based on command line interface
        '''

        cmd_parser = CMDParser()
        args_, self.overwrite_args = cmd_parser.parse()

        self._root_logger.setLevel(args_.verbose)

        if args_.load:
            pred = self.read_model_and_predict(
                model_fn=args_.load, feature_vec=list(map(float, args_.feature_vec.split(" "))))
            print("Selected Schedule [(algorithm, budget)]: %s" % (pred))

        else:

            scenario = ASlibScenario()
            if args_.scenario:
                scenario.read_scenario(args_.scenario)
            elif args_.performance_csv and args_.feature_csv:
                scenario.read_from_csv(perf_fn=args_.performance_csv,
                                       feat_fn=args_.feature_csv,
                                       objective=args_.objective,
                                       runtime_cutoff=args_.runtime_cutoff,
                                       maximize=args_.maximize,
                                       cv_fn=args_.cv_csv)
            else:
                raise ValueError("Missing inputs to read scenario data.")

            test_scenario = None
            if args_.performance_test_csv and args_.feature_test_csv:
                test_scenario = ASlibScenario()
                test_scenario.read_from_csv(perf_fn=args_.performance_test_csv,
                                       feat_fn=args_.feature_test_csv,
                                       objective=args_.objective,
                                       runtime_cutoff=args_.runtime_cutoff,
                                       maximize=args_.maximize,
                                       cv_fn=None)

            config = {}
            if args_.config is not None:
                self.logger.info("Reading yaml config file")
                config = yaml.load(open(args_.config))
            if not config.get("wallclock_limit"):
                config["wallclock_limit"] = args_.wallclock_limit
            if not config.get("runcount_limit"):
                config["runcount_limit"] = args_.runcount_limit
            if not config.get("output-dir"):
                config["output-dir"] = args_.output_dir

            self.cs = self.get_cs(scenario, config)

            if args_.outer_cv:
                self._outer_cv(scenario, config, args_.outer_cv_fold, 
                    args_.out_template, smac_seed=args_.smac_seed)
                return 0
            
            if args_.tune:
                config = self.get_tuned_config(scenario,
                                               wallclock_limit=args_.wallclock_limit,
                                               runcount_limit=args_.runcount_limit,
                                               autofolio_config=config,
                                               seed=args_.smac_seed)
            else:
                config = self.cs.get_default_configuration()
            self.logger.debug(config)

            if args_.save:
                feature_pre_pipeline, pre_solver, selector = self.fit(
                    scenario=scenario, config=config)
                self._save_model(
                    args_.save, scenario, feature_pre_pipeline, pre_solver, selector, config)
            else:
                self.run_cv(config=config, scenario=scenario, folds=int(scenario.cv_data.max().max()))

            if test_scenario is not None:
                stats = self.run_fold(config=config,
                                      fold=0,
                                      return_fit=False,
                                      scenario=scenario,
                                      test_scenario=test_scenario)

    def _outer_cv(self, scenario: ASlibScenario, autofolio_config:dict=None, 
            outer_cv_fold:int=None, out_template:str=None,
            smac_seed:int=42):
        '''
            Evaluate on a scenario using an "outer" cross-fold validation
            scheme. In particular, this ensures that SMAC does not use the test
            set during hyperparameter optimization.

            Arguments
            ---------
            scenario: ASlibScenario
                ASlib Scenario at hand
            
            autofolio_config: dict, or None
                An optional dictionary of configuration options

            outer_cv_fold: int, or None
                If given, then only the single outer-cv fold is processed

            out_template: str, or None
                If given, the learned configurations are written to the 
                specified locations. The string is considered a template, and
                "%fold%" will be replaced with the fold.

            smac_seed:int 
                random seed for SMAC

            Returns
            -------
            stats: validate.Stats
                Performance over all outer-cv folds

        '''
        import string

        outer_stats = None

        # For each outer split
        outer_cv_folds = range(1, 11)
        if outer_cv_fold is not None:
            outer_cv_folds = range(outer_cv_fold, outer_cv_fold+1)

        for cv_fold in outer_cv_folds:
            
            # Use ‘ASlibScenario.get_split()’ to get the outer split
            outer_testing, outer_training = scenario.get_split(cv_fold)
            
            msg = ">>>>> Outer CV fold: {} <<<<<".format(cv_fold)
            self.logger.info(msg)

            # Use ASlibScenario.create_cv_splits() to get an inner-cv
            outer_training.create_cv_splits(n_folds=10)
            
            # Use ‘AutoFolio.get_tuned_config()’ to tune on inner-cv
            config = self.get_tuned_config(
                outer_training, 
                autofolio_config=autofolio_config,
                seed=smac_seed
            )
            
            # Use `AutoFolio.run_fold()’ to get the performance on the outer split
            stats, fit, schedule = self.run_fold(
                config, 
                scenario, 
                cv_fold, 
                return_fit=True
            )

            feature_pre_pipeline, pre_solver, selector = fit

            if outer_stats is None:
                outer_stats = stats
            else:
                outer_stats.merge(stats)

            # save the model, if given an output location
            if out_template is not None:
                out_template_ = string.Template(out_template)
                model_fn = out_template_.substitute(fold=cv_fold, type="pkl")
                
                msg = "Writing model to: {}".format(model_fn)
                self.logger.info(msg)

                self._save_model(
                    model_fn, 
                    scenario, 
                    feature_pre_pipeline, 
                    pre_solver, 
                    selector, 
                    config
                )

                # convert the schedule to a data frame
                schedule_df = pd.Series(schedule, name="solver")
                schedule_df.index.name = "instance"
                schedule_df = schedule_df.reset_index()

                # just keep the solver name; we don't care about the time

                # x[0] gets the first pair in the schedule list
                # and x[0][0] gets the name of the solver from that pair
                schedule_df['solver'] = schedule_df['solver'].apply(lambda x: x[0][0])

                selections_fn = out_template_.substitute(fold=cv_fold, type="csv")

                msg = "Writing solver choices to: {}".format(selections_fn)
                self.logger.info(msg)

                schedule_df.to_csv(selections_fn, index=False)

        self.logger.info(">>>>> Final Stats <<<<<")
        outer_stats.show()

    def _save_model(self, out_fn: str, scenario: ASlibScenario, feature_pre_pipeline: list, pre_solver: Aspeed, selector, config: Configuration):
        '''
            save all pipeline objects for predictions

            Arguments
            ---------
            out_fn: str
                filename of output file
            scenario: AslibScenario
                ASlib scenario with all the data
            feature_pre_pipeline: list
                list of preprocessing objects
            pre_solver: Aspeed
                aspeed object with pre-solving schedule
            selector: autofolio.selector.*
                fitted selector object
            config: Configuration
                parameter setting configuration
        '''
        scenario.logger = None
        for fpp in feature_pre_pipeline:
            fpp.logger = None
        if pre_solver:
            pre_solver.logger = None
        selector.logger = None
        model = [scenario, feature_pre_pipeline, pre_solver, selector, config]
        with open(out_fn, "bw") as fp:
            pickle.dump(model, fp)

    def read_model_and_predict(self, model_fn: str, feature_vec: list):
        '''
            reads saved model from disk and predicts the selected algorithm schedule for a given feature vector

            Arguments
            --------
            model_fn: str
                file name of saved model
            feature_vec: list
                instance feature vector as a list of floats 

            Returns
            -------
            list of tuple
                Selected schedule [(algorithm, budget)]
        '''
        with open(model_fn, "br") as fp:
            scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load(
                fp)

        for fpp in feature_pre_pipeline:
            fpp.logger = logging.getLogger("Feature Preprocessing")
        if pre_solver:
            pre_solver.logger = logging.getLogger("Aspeed PreSolving")
        selector.logger = logging.getLogger("Selector")

        # saved scenario is adapted to given feature vector
        feature_vec = np.array([feature_vec])
        scenario.feature_data = pd.DataFrame(
            feature_vec, index=["pseudo_instance"], columns=scenario.features)
        scenario.instances = ["pseudo_instance"]
        pred = self.predict(scenario=scenario, config=config,
                            feature_pre_pipeline=feature_pre_pipeline, pre_solver=pre_solver, selector=selector)

        return pred["pseudo_instance"]

    def get_cs(self, scenario: ASlibScenario, autofolio_config:dict=None):
        '''
            returns the parameter configuration space of AutoFolio
            (based on the automl config space: https://github.com/automl/ConfigSpace)

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand

            autofolio_config: dict, or None
                An optional dictionary of configuration options
        '''

        self.cs = ConfigurationSpace()

        # only allow the feature groups specified in the config file
        # by default, though, all of the feature groups are allowed.
        allowed_feature_groups = autofolio_config.get("allowed_feature_groups", 
            scenario.feature_steps)

        if len(allowed_feature_groups) == 0:
            msg = "Please ensure at least one feature group is allowed"
            raise ValueError(msg)


        if len(allowed_feature_groups) == 1: 
            choices = [True] # if we only have one feature group, it has to be active 
        else:
            choices = [True, False]
        default = True

        for fs in allowed_feature_groups:
            
            fs_param = CategoricalHyperparameter(name="fgroup_%s" % (fs),
                choices=choices, default_value=default)
            self.cs.add_hyperparameter(fs_param)

        # preprocessing
        if autofolio_config.get("pca", True):
            PCAWrapper.add_params(self.cs)

        if autofolio_config.get("impute", True):
            ImputerWrapper.add_params(self.cs)

        if autofolio_config.get("scale", True):
            StandardScalerWrapper.add_params(self.cs)

        # Pre-Solving
        if scenario.performance_type[0] == "runtime":
            if autofolio_config.get("presolve", True):
                Aspeed.add_params(
                    cs=self.cs, cutoff=scenario.algorithm_cutoff_time)

        if autofolio_config.get("classifier"):
            # fix parameter
            cls_choices = [autofolio_config["classifier"]]
            cls_def = autofolio_config["classifier"]
        else:
            cls_choices = ["RandomForest","XGBoost"]
            cls_def = "RandomForest"
        classifier = CategoricalHyperparameter(
                "classifier", choices=cls_choices, 
                default_value=cls_def)

        self.cs.add_hyperparameter(classifier)

        RandomForest.add_params(self.cs)
        XGBoost.add_params(self.cs)

        if autofolio_config.get("regressor"):
            # fix parameter
            reg_choices = [autofolio_config["regressor"]]
            reg_def = autofolio_config["regressor"]
        else:
            reg_choices = ["RandomForestRegressor"]
            reg_def = "RandomForestRegressor"

        regressor = CategoricalHyperparameter(
                "regressor", choices=reg_choices, default_value=reg_def)
        self.cs.add_hyperparameter(regressor)
        RandomForestRegressor.add_params(self.cs)

        # selectors
        if autofolio_config.get("selector"):
            # fix parameter
            sel_choices = [autofolio_config["selector"]]
            sel_def = autofolio_config["selector"]
        else:
            sel_choices = ["PairwiseClassifier","PairwiseRegressor"]
            sel_def = "PairwiseClassifier"
            
        selector = CategoricalHyperparameter(
                "selector", choices=sel_choices, default_value=sel_def)
        self.cs.add_hyperparameter(selector)
        PairwiseClassifier.add_params(self.cs)
        PairwiseRegression.add_params(self.cs)  

        self.logger.debug(self.cs)

        return self.cs

    def get_tuned_config(self, scenario: ASlibScenario, 
                         runcount_limit:int=42,
                         wallclock_limit:int=300,
                         autofolio_config:dict=dict(),
                         seed:int=42):
        '''
            uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario

            Arguments
            ---------
            scenario: ASlibScenario
                ASlib Scenario at hand
            runcount_limit: int
                runcount_limit for SMAC scenario
            wallclock_limit: int
                wallclock limit in sec for SMAC scenario
                (overwritten by autofolio_config)
            autofolio_config: dict, or None
                An optional dictionary of configuration options
            seed: int
                random seed for SMAC

            Returns
            -------
            Configuration
                best incumbent configuration found by SMAC
        '''

        wallclock_limit = autofolio_config.get("wallclock_limit", wallclock_limit)
        runcount_limit = autofolio_config.get("runcount_limit", runcount_limit)

        taf = functools.partial(self.called_by_smac, scenario=scenario)
        max_fold = scenario.cv_data.max().max()
        max_fold = int(max_fold)

        ac_scenario = Scenario({"run_obj": "quality",  # we optimize quality
                                "runcount-limit": runcount_limit,
                                "cs": self.cs,  # configuration space
                                "deterministic": "true",
                                "instances": [[str(i)] for i in range(1, max_fold+1)],
                                "wallclock-limit": wallclock_limit,
                                "output-dir" : "" if not autofolio_config.get("output-dir",None) else autofolio_config.get("output-dir") 
                                })

        # necessary to use stats options related to scenario information
        AC_Stats.scenario = ac_scenario

        # Optimize
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        self.logger.info("Start Configuration")
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        smac = SMAC(scenario=ac_scenario, tae_runner=taf,
                    rng=np.random.RandomState(seed))
        incumbent = smac.optimize()

        self.logger.info("Final Incumbent: %s" % (incumbent))

        return incumbent

    def called_by_smac(self, config: Configuration, scenario: ASlibScenario, instance:str=None, seed:int=1):
        '''
            run a cross fold validation based on the given data from cv.arff

            Arguments
            ---------
            config: Configuration
                parameter configuration to use for preprocessing
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            instance: str
                cv-fold index 
            seed: int
                random seed (not used)
                
            Returns
            -------
            float: average performance
        '''
        
        if instance is None:
            perf = self.run_cv(config=config, scenario=scenario)
        else:
            try:
                stats = self.run_fold(config=config, scenario=scenario, fold=int(instance))
                perf = stats.show()
            except ValueError:
                if scenario.performance_type[0] == "runtime":
                    perf = scenario.algorithm_cutoff_time * 20
                else:
                    # try to impute a worst case perf
                    perf = scenario.performance_data.max().max()
                
        if scenario.maximize[0]:
            perf *= -1
        
        return perf

    def run_cv(self, config: Configuration, scenario: ASlibScenario, folds:int=10):
        '''
            run a cross fold validation based on the given data from cv.arff

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            folds: int
                number of cv-splits
            seed: int
                random seed (not used)
        '''
        #TODO: use seed and instance in an appropriate way
        try:
            if scenario.performance_type[0] == "runtime":
                cv_stat = Stats(runtime_cutoff=scenario.algorithm_cutoff_time)
            else:
                cv_stat = Stats(runtime_cutoff=0)
            for i in range(1, folds + 1):
                self.logger.info("CV-Iteration: %d" % (i))
                stats = self.run_fold(config=config,
                                      scenario=scenario,
                                      fold=i)
                cv_stat.merge(stat=stats)

            self.logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
            self.logger.info("CV Stats")
            par10 = cv_stat.show()
        except ValueError:
            traceback.print_exc()
            par10 = scenario.algorithm_cutoff_time * 10

        if scenario.maximize[0]:
            par10 *= -1

        return par10

    def run_fold(self, config: Configuration, scenario:ASlibScenario, fold:int, test_scenario=None, return_fit:bool=False):
        '''
            run a given fold of cross validation
            
            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            fold: int
                fold id
            test_scenario:aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario with test data for validation
                generated from <scenario> if None

            return_fit: bool
                optionally, the learned preprocessing options, presolver and
                selector can be returned
                
            Returns
            -------
            Stats()

            (pre_pipeline, pre_solver, selector):
                only present if return_fit is True
                the pipeline components fit with the configuration options

            schedule: dict of string -> list of (solver, cutoff) pairs
                only present if return_fit is True
                the solver choices for each instance
                
                
        '''

        if test_scenario is None:
            self.logger.info("CV-Iteration: %d" % (fold))
            test_scenario, training_scenario = scenario.get_split(indx=fold)
        else:
            self.logger.info("Validation on test data")
            training_scenario = scenario

        feature_pre_pipeline, pre_solver, selector = self.fit(
            scenario=training_scenario, config=config)

        schedules = self.predict(
            test_scenario, config, feature_pre_pipeline, pre_solver, selector)

        val = Validator()
        if scenario.performance_type[0] == "runtime":
            stats = val.validate_runtime(
                schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
        elif scenario.performance_type[0] == "solution_quality":
            stats = val.validate_quality(
                schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
        else:
            raise ValueError("Unknown: %s" %(scenario.performance_type[0]))
        
        if return_fit:
            return stats, (feature_pre_pipeline, pre_solver, selector), schedules
        else:
            return stats

    def fit(self, scenario: ASlibScenario, config: Configuration):
        '''
            fit AutoFolio on given ASlib Scenario

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing

            Returns
            -------
                list of fitted feature preproccessing objects
                pre-solving object
                fitted selector
        '''
        self.logger.info("Given Configuration: %s" % (config))

        if self.overwrite_args:
            config = self._overwrite_configuration(
                config=config, overwrite_args=self.overwrite_args)
            self.logger.info("Overwritten Configuration: %s" % (config))

        scenario, feature_pre_pipeline = self.fit_transform_feature_preprocessing(
            scenario, config)

        pre_solver = self.fit_pre_solving(scenario, config)

        selector = self.fit_selector(scenario, config)

        return feature_pre_pipeline, pre_solver, selector

    def _overwrite_configuration(self, config: Configuration, overwrite_args: list):
        '''
            overwrites a given configuration with some new settings

            Arguments
            ---------
            config: Configuration
                initial configuration to be adapted
            overwrite_args: list
                new parameter settings as a list of strings

            Returns
            -------
            Configuration
        '''

        def pairwise(iterable):
            a, b = tee(iterable)
            next(b, None)
            return zip(a, b)

        dict_conf = config.get_dictionary()
        for param, value in pairwise(overwrite_args):
            try:
                ok = self.cs.get_hyperparameter(param)
            except KeyError:
                ok = None
            if ok is not None:
                if type(self.cs.get_hyperparameter(param)) is UniformIntegerHyperparameter:
                    dict_conf[param] = int(value)
                elif type(self.cs.get_hyperparameter(param)) is UniformFloatHyperparameter:
                    dict_conf[param] = float(value)
                elif value == "True":
                    dict_conf[param] = True
                elif value == "False":
                    dict_conf[param] = False
                else:
                    dict_conf[param] = value
            else:
                self.logger.warn(
                    "Unknown given parameter: %s %s" % (param, value))
        config = Configuration(self.cs, values=dict_conf, allow_inactive_with_values=True)

        return config

    def fit_transform_feature_preprocessing(self, scenario: ASlibScenario, config: Configuration):
        '''
            performs feature preprocessing on a given ASlib scenario wrt to a given configuration

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing

            Returns
            -------
                list of fitted feature preproccessing objects
        '''

        pipeline = []
        fgf = FeatureGroupFiltering()
        scenario = fgf.fit_transform(scenario, config)

        imputer = ImputerWrapper()
        scenario = imputer.fit_transform(scenario, config)

        scaler = StandardScalerWrapper()
        scenario = scaler.fit_transform(scenario, config)

        pca = PCAWrapper()
        scenario = pca.fit_transform(scenario, config)

        return scenario, [fgf, imputer, scaler, pca]

    def fit_pre_solving(self, scenario: ASlibScenario, config: Configuration):
        '''
            fits an pre-solving schedule using Aspeed [Hoos et al, 2015 TPLP) 

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing

            Returns
            -------
            instance of Aspeed() with a fitted pre-solving schedule if performance_type of scenario is runtime; else None
        '''
        if scenario.performance_type[0] == "runtime":
            aspeed = Aspeed()
            aspeed.fit(scenario=scenario, config=config)
            return aspeed
        else:
            return None

    def fit_selector(self, scenario: ASlibScenario, config: Configuration):
        '''
            fits an algorithm selector for a given scenario wrt a given configuration

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration
        '''

        if config.get("selector") == "PairwiseClassifier":
            clf_class = None
            if config.get("classifier") == "RandomForest":
                clf_class = RandomForest
            if config.get("classifier") == "XGBoost":
                clf_class = XGBoost

            selector = PairwiseClassifier(classifier_class=clf_class)
            selector.fit(scenario=scenario, config=config)

        if config.get("selector") == "MultiClassifier":
            clf_class = None
            if config.get("classifier") == "RandomForest":
                clf_class = RandomForest
            if config.get("classifier") == "XGBoost":
                clf_class = XGBoost

            selector = MultiClassifier(classifier_class=clf_class)
            selector.fit(scenario=scenario, config=config)

        if config.get("selector") == "IndRegressor":
            reg_class = None
            if config.get("regressor") == "RandomForestRegressor":
                reg_class = RandomForestRegressor
                
            selector = IndRegression(regressor_class=reg_class)
            selector.fit(scenario=scenario, config=config)
            
        if config.get("selector") == "JointRegressor":
            reg_class = None
            if config.get("regressor") == "RandomForestRegressor":
                reg_class = RandomForestRegressor
                
            selector = JointRegression(regressor_class=reg_class)
            selector.fit(scenario=scenario, config=config)

        if config.get("selector") == "PairwiseRegressor":
            reg_class = None
            if config.get("regressor") == "RandomForestRegressor":
                reg_class = RandomForestRegressor
                
            selector = PairwiseRegression(regressor_class=reg_class)
            selector.fit(scenario=scenario, config=config)

        return selector

    def predict(self, scenario: ASlibScenario, config: Configuration, feature_pre_pipeline: list, pre_solver: Aspeed, selector):
        '''
            predicts algorithm schedules wrt a given config
            and given pipelines

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration
            feature_pre_pipeline: list
                list of fitted feature preprocessors
            pre_solver: Aspeed
                pre solver object with a saved static schedule
            selector: autofolio.selector.*
                fitted selector object
        '''

        self.logger.info("Predict on Test")
        for f_pre in feature_pre_pipeline:
            scenario = f_pre.transform(scenario)

        if pre_solver:
            pre_solving_schedule = pre_solver.predict(scenario=scenario)
        else:
            pre_solving_schedule = {}

        pred_schedules = selector.predict(scenario=scenario)

        # combine schedules
        if pre_solving_schedule:
            return dict((inst, pre_solving_schedule.get(inst, []) + schedule) for inst, schedule in pred_schedules.items())
        else:
            return pred_schedules
예제 #33
0
from copy import copy, deepcopy
from pickle import dumps, loads

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter

weights = [0.25, 0.5, 0.25]
hp = CategoricalHyperparameter("B", ["1", "2", "3"], weights=weights)
sub_cs = ConfigurationSpace()
sub_cs.add_hyperparameter(hp)
cs = ConfigurationSpace()
cs.add_configuration_space("A", sub_cs)
print(deepcopy(sub_cs).get_hyperparameter("B").probabilities, weights)
print(copy(sub_cs).get_hyperparameter("B").probabilities, weights)
print(loads(dumps(sub_cs)).get_hyperparameter("B").probabilities, weights)
print(cs.get_hyperparameter("A:B").probabilities, weights)
print(deepcopy(cs).get_hyperparameter("A:B").probabilities, weights)
print(copy(cs).get_hyperparameter("A:B").probabilities, weights)
print(loads(dumps(cs)).get_hyperparameter("A:B").probabilities, weights)
def generate_csv_data(NUM_EVALUATIONS, NUM_BUDGETS, ALLINONE, SEPARATE):
    if not os.path.exists(ALLINONE):
        os.makedirs(ALLINONE)
    if not os.path.exists(SEPARATE):
        os.makedirs(SEPARATE)

    config_space = ConfigurationSpace()
    config_space.add_hyperparameters([UniformFloatHyperparameter('random_parameter_1', 0, 1.2),
                                      UniformIntegerHyperparameter('random_parameter_2', -10, 10),
                                      UniformIntegerHyperparameter('random_parameter_3', 1, 1000)])

    trajectory = []
    runhistory = []
    lowest_cost = np.inf
    start_time = time.time()
    if NUM_BUDGETS <= 1:
        budgets = [0 for _ in range(NUM_EVALUATIONS)]
    else:
        budgets = [50 + 50 * (i // (NUM_EVALUATIONS / NUM_BUDGETS)) for i in range(NUM_EVALUATIONS)]
    for i, budget in enumerate(budgets):
        if i == 0:
            random1 = config_space.get_hyperparameter('random_parameter_1').default_value
            random2 = config_space.get_hyperparameter('random_parameter_2').default_value
            random3 = config_space.get_hyperparameter('random_parameter_3').default_value
        else:
            random1 = np.random.uniform(0.1, 1.1)
            random2 = np.random.randint(-10, 10)
            random3 = np.random.randint(1, 1000)
        cost = np.random.uniform(np.abs(NUM_EVALUATIONS - i - np.random.randint(50)),
                                 10 * np.log(NUM_EVALUATIONS - i)) * random1
        new_time = time.time() - start_time
        status = 'SUCCESS'
        seed = 42  # should be: np.random.randint(1, 10000000) but seeds are currently not supported with budgets.
        if lowest_cost > cost:
            lowest_cost = cost
            trajectory.append([new_time, new_time, i, cost, random1, random2, random3])
        runhistory.append([cost, new_time, status, budget, seed, random1, random2, random3])

    with open(os.path.join(ALLINONE, 'runhistory.csv'), 'w', newline='') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['cost', 'time', 'status', 'budget', 'seed', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3'])
        for run in runhistory:
            writer.writerow(run)

    with open(os.path.join(SEPARATE, 'runhistory.csv'), 'w', newline='') as rh,\
         open(os.path.join(SEPARATE, 'configurations.csv'), 'w', newline='') as configs:
        rh_writer = csv.writer(rh, delimiter=',')
        configs_writer = csv.writer(configs, delimiter=',')
        rh_writer.writerow(['cost', 'time', 'status', 'budget', 'seed', 'config_id'])
        configs_writer.writerow(['CONFIG_ID', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3'])
        for idx, run in enumerate(runhistory):
            rh_writer.writerow(run[:5] + [idx])
            configs_writer.writerow([idx] + run[5:])

    for path in [ALLINONE, SEPARATE]:
        with open(os.path.join(path, 'configspace.json'), 'w') as f:
            f.write(pcs_json.write(config_space))

        with open(os.path.join(path, 'trajectory.csv'), 'w', newline='') as f:
            writer = csv.writer(f, delimiter=',')
            writer.writerow(['cpu_time', 'wallclock_time', 'evaluations', 'cost', 'random_parameter_1', 'random_parameter_2',
                             'random_parameter_3'])
            for t in trajectory:
                writer.writerow(t)

        with open(os.path.join(path, 'scenario.txt'), 'w' ) as f:
            f.write('paramfile = {}\nrun_obj = quality'.format(os.path.join(os.path.basename(path.rstrip('/')),
                                                                            'configspace.json')))
예제 #35
0
def read(pcs_string, debug=False):
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    ord_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()
        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" % line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {"int": UniformIntegerHyperparameter,
                  "float": UniformFloatHyperparameter,
                  "categorical": CategoricalHyperparameter,
                  "ordinal": OrdinalHyperparameter
                  }

        try:
            param_list = pp_cont_param.parseString(line)
            name = param_list[0]
            if param_list[1]  == 'integer':
                paramtype = 'int'
            elif param_list[1] == 'real':
                paramtype = 'float'
            else:
                paramtype = None

            if paramtype in ['int', 'float']:
                log = param_list[10:]
                param_list = param_list[:10]
                if len(log) > 0:
                    log = log[0]
                lower = float(param_list[3])
                upper = float(param_list[5])
                log_on = True if "log" in log else False
                default = float(param_list[8])
                param = create[paramtype](name=name, lower=lower, upper=upper,
                                          q=None, log=log_on, default=default)
                cont_ct += 1

        except pyparsing.ParseException:
            pass

        try:
            if "categorical" in line:
                param_list = pp_cat_param.parseString(line)
                name = param_list[0]
                choices = [choice for choice in param_list[3:-4:2]]
                default = param_list[-2]
                param = create["categorical"](name=name, choices=choices, default=default)
                cat_ct += 1

            elif "ordinal" in line:
                param_list = pp_ord_param.parseString(line)
                name = param_list[0]
                sequence = [seq for seq in param_list[3:-4:2]]
                default = param_list[-2]
                param = create["ordinal"](name=name, sequence=sequence, default=default)
                ord_ct += 1

        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    # TODO maybe add a check if the hyperparameter is
                    # actually in the configuration space
                    clause_list.append(ForbiddenEqualsClause(
                        configuration_space.get_hyperparameter(tmp_list[0]),
                        tmp_list[2]))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(ForbiddenAndConjunction(
            *clause_list))
            
    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        for condition in conditions_per_child[child_name]:
            condition = condition[2:]
            condition = ' '.join(condition)
            if '||' in str(condition):
                ors = []
                # 1st case we have a mixture of || and &&
                if '&&' in str(condition):
                    ors_combis = []
                    for cond_parts in str(condition).split('||'):
                        condition = str(cond_parts).split('&&')
                        # if length is 1 it must be or
                        if len(condition) == 1:
                            element_list =  condition[0].split()
                            ors_combis.append(condition_specification(child_name, element_list, configuration_space))       
                        else:
                            # now taking care of ands
                            ands = []
                            for and_part in condition:
                                element_list = [element for part in condition for element in and_part.split()]
                                ands.append(condition_specification(child_name, element_list, configuration_space))
                            ors_combis.append(AndConjunction(*ands))
                    mixed_conjunction = OrConjunction(*ors_combis)
                    configuration_space.add_condition(mixed_conjunction)
                else:
                    # 2nd case: we only have ors
                    for cond_parts in str(condition).split('||'):
                        element_list = [element for element in cond_parts.split()]
                        ors.append(condition_specification(child_name, element_list, configuration_space))
                    or_conjunction = OrConjunction(*ors)
                    configuration_space.add_condition(or_conjunction)
            else:
                # 3rd case: we only have ands
                if '&&' in str(condition):
                    ands = []
                    for cond_parts in str(condition).split('&&'):
                        element_list = [element for element in cond_parts.split()]
                        ands.append(condition_specification(child_name, element_list, configuration_space))
                    and_conjunction = AndConjunction(*ands)
                    configuration_space.add_condition(and_conjunction)
                else:
                    # 4th case: we have a normal condition
                    element_list = [element for element in condition.split()]
                    normal_condition = condition_specification(child_name, element_list, configuration_space)
                    configuration_space.add_condition(normal_condition)
   
    return configuration_space
예제 #36
0
def read(pcs_string, debug=False):
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()

        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" %
                                          line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {
            "int": UniformIntegerHyperparameter,
            "float": UniformFloatHyperparameter,
            "categorical": CategoricalHyperparameter
        }

        try:
            param_list = pp_cont_param.parseString(line)
            il = param_list[9:]
            if len(il) > 0:
                il = il[0]
            param_list = param_list[:9]
            name = param_list[0]
            lower = float(param_list[2])
            upper = float(param_list[4])
            paramtype = "int" if "i" in il else "float"
            log = True if "l" in il else False
            default = float(param_list[7])
            param = create[paramtype](name=name,
                                      lower=lower,
                                      upper=upper,
                                      q=None,
                                      log=log,
                                      default=default)
            cont_ct += 1
        except pyparsing.ParseException:
            pass

        try:
            param_list = pp_cat_param.parseString(line)
            name = param_list[0]
            choices = [c for c in param_list[2:-4:2]]
            default = param_list[-2]
            param = create["categorical"](name=name,
                                          choices=choices,
                                          default=default)
            cat_ct += 1
        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        # TODO test this properly!
        # TODO Add a try/catch here!
        # noinspection PyUnusedLocal
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    # TODO maybe add a check if the hyperparameter is
                    # actually in the configuration space
                    clause_list.append(
                        ForbiddenEqualsClause(
                            configuration_space.get_hyperparameter(
                                tmp_list[0]), tmp_list[2]))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(
            ForbiddenAndConjunction(*clause_list))

    #Now handle conditions
    # If there are two conditions for one child, these two conditions are an
    # AND-conjunction of conditions, thus we have to connect them
    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        condition_objects = []
        for condition in conditions_per_child[child_name]:
            child = configuration_space.get_hyperparameter(child_name)
            parent_name = condition[2]
            parent = configuration_space.get_hyperparameter(parent_name)
            restrictions = condition[5:-1:2]

            # TODO: cast the type of the restriction!
            if len(restrictions) == 1:
                condition = EqualsCondition(child, parent, restrictions[0])
            else:
                condition = InCondition(child, parent, values=restrictions)
            condition_objects.append(condition)

        # Now we have all condition objects for this child, so we can build a
        #  giant AND-conjunction of them (if number of conditions >= 2)!

        if len(condition_objects) > 1:
            and_conjunction = AndConjunction(*condition_objects)
            configuration_space.add_condition(and_conjunction)
        else:
            configuration_space.add_condition(condition_objects[0])

    return configuration_space
예제 #37
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''
        try:
            classifier = cs.get_hyperparameter("classifier")
            classifier.choices.append("RandomForest")
        except KeyError:
            classifier = CategoricalHyperparameter("classifier",
                                                   choices=["RandomForest"],
                                                   default="RandomForest")
            cs.add_hyperparameter(classifier)

        n_estimators = UniformIntegerHyperparameter(name="rf:n_estimators",
                                                    lower=10,
                                                    upper=100,
                                                    default=10,
                                                    log=True)
        cs.add_hyperparameter(n_estimators)
        criterion = CategoricalHyperparameter(name="rf:criterion",
                                              choices=["gini", "entropy"],
                                              default="gini")
        cs.add_hyperparameter(criterion)
        max_features = CategoricalHyperparameter(
            name="rf:max_features",
            choices=["sqrt", "log2", None],
            default="sqrt")
        cs.add_hyperparameter(max_features)
        max_depth = UniformIntegerHyperparameter(name="rf:max_depth",
                                                 lower=10,
                                                 upper=2**31,
                                                 default=2**31,
                                                 log=True)
        cs.add_hyperparameter(max_depth)
        min_samples_split = UniformIntegerHyperparameter(
            name="rf:min_samples_split",
            lower=2,
            upper=100,
            default=2,
            log=True)
        cs.add_hyperparameter(min_samples_split)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="rf:min_samples_leaf",
            lower=2,
            upper=100,
            default=10,
            log=True)
        cs.add_hyperparameter(min_samples_leaf)
        bootstrap = CategoricalHyperparameter(name="rf:bootstrap",
                                              choices=[True, False],
                                              default=True)
        cs.add_hyperparameter(bootstrap)

        cond = InCondition(child=n_estimators,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(child=criterion,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(child=max_features,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(child=max_depth,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(child=min_samples_split,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(child=min_samples_leaf,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
        cond = InCondition(child=bootstrap,
                           parent=classifier,
                           values=["RandomForest"])
        cs.add_condition(cond)
예제 #38
0
    def _get_hyperparameter_search_space(
        self,
        dataset_properties: Dict[str, BaseDatasetPropertiesType],
        include: Optional[Dict[str, Any]] = None,
        exclude: Optional[Dict[str, Any]] = None,
    ) -> ConfigurationSpace:
        """Create the hyperparameter configuration space.

        For the given steps, and the Choices within that steps,
        this procedure returns a configuration space object to
        explore.

        Args:
            include (Optional[Dict[str, Any]]):
                What hyper-parameter configurations
                to honor when creating the configuration space
            exclude (Optional[Dict[str, Any]]):
                What hyper-parameter configurations
                to remove from the configuration space
            dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]):
                Characteristics of the dataset to guide the pipeline
                choices of components

        Returns:
            cs (ConfigurationSpace):
                The configuration space describing
                the TabularClassificationPipeline.
        """
        cs = ConfigurationSpace()

        if not isinstance(dataset_properties, dict):
            warnings.warn(
                'The given dataset_properties argument contains an illegal value.'
                'Proceeding with the default value')
            dataset_properties = dict()

        if 'target_type' not in dataset_properties:
            dataset_properties['target_type'] = 'tabular_classification'
        if dataset_properties['target_type'] != 'tabular_classification':
            warnings.warn(
                'Tabular classification is being used, however the target_type'
                'is not given as "tabular_classification". Overriding it.')
            dataset_properties['target_type'] = 'tabular_classification'
        # get the base search space given this
        # dataset properties. Then overwrite with custom
        # classification requirements
        cs = self._get_base_search_space(cs=cs,
                                         dataset_properties=dataset_properties,
                                         exclude=exclude,
                                         include=include,
                                         pipeline=self.steps)

        # Here we add custom code, that is used to ensure valid configurations, For example
        # Learned Entity Embedding is only valid when encoder is one hot encoder
        if 'network_embedding' in self.named_steps.keys(
        ) and 'encoder' in self.named_steps.keys():
            embeddings = cs.get_hyperparameter(
                'network_embedding:__choice__').choices
            if 'LearnedEntityEmbedding' in embeddings:
                encoders = cs.get_hyperparameter('encoder:__choice__').choices
                possible_default_embeddings = copy.copy(list(embeddings))
                del possible_default_embeddings[
                    possible_default_embeddings.index(
                        'LearnedEntityEmbedding')]

                for encoder in encoders:
                    if encoder == 'OneHotEncoder':
                        continue
                    while True:
                        try:
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'network_embedding:__choice__'),
                                        'LearnedEntityEmbedding'),
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'encoder:__choice__'), encoder)))
                            break
                        except ValueError:
                            # change the default and try again
                            try:
                                default = possible_default_embeddings.pop()
                            except IndexError:
                                raise ValueError(
                                    "Cannot find a legal default configuration"
                                )
                            cs.get_hyperparameter(
                                'network_embedding:__choice__'
                            ).default_value = default

        self.configuration_space = cs
        self.dataset_properties = dataset_properties
        return cs
예제 #39
0
    def __rely_model(self, cs: ConfigurationSpace):
        if not RelyModels.info:
            return
        all_models = list(
            cs.get_hyperparameter("estimator:__choice__").choices)
        rely_model_counter = Counter([x[0] for x in RelyModels.info])
        # 依赖模式->所有相应模型
        relied2AllModels = {}
        # 依赖模式->无交集相应模型
        relied2models = {}
        for rely_model in rely_model_counter.keys():
            _, hit = self.get_forbid_hit_in_models_by_rely(
                all_models, rely_model)
            relied2AllModels[rely_model] = hit
        # 如果某依赖模式不对应任何模型,删除
        for k, v in list(relied2AllModels.items()):
            if not v:
                relied2AllModels.pop(k)
                rely_model_counter.pop(k)
        has_any_hit = any(relied2AllModels.values())
        if not has_any_hit:
            return
        # 按照规则计算  relied2models  :  无交集相应模型
        relied_cnts_tuples = [(k, v) for k, v in rely_model_counter.items()]
        relied_cnts_tuples.sort(key=lambda x: x[-1])
        visited = set()
        for rely_model, _ in relied_cnts_tuples:
            models = relied2AllModels[rely_model]
            for other in set(rely_model_counter.keys()) - {rely_model}:
                if (rely_model, other) in visited:
                    continue
                other_models = relied2AllModels[other]
                if len(other_models) <= len(models):
                    models = list(set(models) - set(other_models))
                    visited.add((rely_model, other))
                    visited.add((other, rely_model))
            relied2models[rely_model] = models

        # 键的顺序遵循rely_model_counter.keys()
        def objective(relyModel2prob, debug=False):
            # relyModel2prob = {rely_model: prob for rely_model, prob in zip(list(rely_model_counter.keys()), args)}
            cur_cs = deepcopy(cs)
            self.set_probabilities_in_cs(cur_cs, relied2models,
                                         relied2AllModels, all_models,
                                         **relyModel2prob)

            cur_cs.seed(42)
            try:
                counter = Counter([
                    _hp.get("estimator:__choice__")
                    for _hp in cur_cs.sample_configuration(
                        len(all_models) * 15)
                ])

                if debug:
                    print(counter)
            except Exception:
                return np.inf
            vl = list(counter.values())
            return np.var(vl) + 100 * (len(models) - len(vl))

        space = {}
        eps = 0.001
        N_rely_model = len(rely_model_counter.keys())
        for rely_model in rely_model_counter.keys():
            space[rely_model] = hp.uniform(rely_model, eps,
                                           (1 / N_rely_model) - eps)

        best = fmin(
            fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            rstate=np.random.RandomState(42),
            show_progressbar=False,
        )
        print("best =", best)
        objective(best, debug=True)
        self.set_probabilities_in_cs(cs, relied2models, relied2AllModels,
                                     all_models, **best)