예제 #1
0
파일: util.py 프로젝트: Neeratyoy/MMFB
def map_to_config(cs: CS.ConfigurationSpace,
                  vector: Union[np.array, List]) -> CS.Configuration:
    """Return a ConfigSpace object with values assigned from the vector

    Parameters
    ----------
    cs : ConfigSpace.ConfigurationSpace
    vector : np.array or list

    Returns
    -------
    ConfigSpace.Configuration
    """
    config = cs.sample_configuration()
    for i, name in enumerate(np.sort(cs.get_hyperparameter_names())):
        hp = cs.get_hyperparameter(str(name))
        if isinstance(hp, CS.UniformIntegerHyperparameter):
            config[hp.name] = int(vector[i])
        elif isinstance(hp, CS.UniformFloatHyperparameter):
            # clip introduced to handle the edge case when a 64-bit float type casting introduces
            # extra precision which can make the number lower than the hard hp.lower limit
            config[hp.name] = np.clip(float(vector[i]), hp.lower, hp.upper)
        else:
            config[hp.name] = vector[i]
    return config
예제 #2
0
    def _check_and_cast_configuration(configuration: Union[Dict, ConfigSpace.Configuration],
                                      configuration_space: ConfigSpace.ConfigurationSpace) \
            -> ConfigSpace.Configuration:
        """ Helper-function to evaluate the given configuration.
            Cast it to a ConfigSpace.Configuration and evaluate if it violates its boundaries.

            Note:
                We remove inactive hyperparameters from the given configuration. Inactive hyperparameters are
                hyperparameters that are not relevant for a configuration, e.g. hyperparameter A is only relevant if
                hyperparameter B=1 and if B!=1 then A is inactive and will be removed from the configuration.
                Since the authors of the benchmark removed those parameters explicitly, they should also handle the
                cases that inactive parameters are not present in the input-configuration.
        """

        if isinstance(configuration, dict):
            configuration = ConfigSpace.Configuration(configuration_space, configuration,
                                                      allow_inactive_with_values=True)
        elif isinstance(configuration, ConfigSpace.Configuration):
            configuration = configuration
        else:
            raise TypeError(f'Configuration has to be from type List, np.ndarray, dict, or '
                            f'ConfigSpace.Configuration but was {type(configuration)}')

        all_hps = set(configuration_space.get_hyperparameter_names())
        active_hps = configuration_space.get_active_hyperparameters(configuration)
        inactive_hps = all_hps - active_hps

        if len(inactive_hps) != 0:
            logger.debug(f'There are inactive {len(inactive_hps)} hyperparameter: {inactive_hps}'
                         'Going to remove them from the configuration.')

        configuration = deactivate_inactive_hyperparameters(configuration, configuration_space)
        configuration_space.check_configuration(configuration)

        return configuration
예제 #3
0
파일: util.py 프로젝트: Neeratyoy/MMFB
def get_fidelity_grid(
    cs: CS.ConfigurationSpace,
    grid_step_size: int = 10,
    convert_to_configspace: bool = False,
    include_sh_budgets: bool = True
) -> Union[List[Tuple], List[CS.Configuration]]:
    """Generates a grid from cartesian product of the fidelity spaced out at given step size

    Parameters
    ----------
    cs : ConfigSpace.ConfigurationSpace
    grid_step_size : int
        The number of steps to divide a parameter dimension into
    convert_to_configspace : bool
        If True, returns a list of ConfigSpace objects of each point in the grid
        If False, returns a list of tuples containing the values of each point in grid
    include_sh_budgets : bool
        If True, additionally includes budget spacing from Hyperband for eta={2,3,4}

    Returns
    -------
    list
    """
    param_ranges = []
    selected_etas = [3]
    for name in np.sort(cs.get_hyperparameter_names()):
        hp = cs.get_hyperparameter(str(name))
        if isinstance(hp, CS.Constant):
            param_ranges.append([hp.value])
        else:
            if hp.log:
                grid = np.exp(
                    np.linspace(np.log(hp.lower), np.log(hp.upper),
                                grid_step_size))
                grid = np.clip(grid, hp.lower, hp.upper)  #.astype(np.float32)
            else:
                grid = np.linspace(hp.lower, hp.upper,
                                   grid_step_size)  #, dtype=np.float32)
            if include_sh_budgets:
                hb_grid = np.array([])
                for eta in selected_etas:
                    hb_grid = np.concatenate(
                        (hb_grid,
                         generate_SH_fidelities(hp.lower, hp.upper,
                                                eta)))  # .astype(np.float32)
                grid = np.unique(np.concatenate((hb_grid, grid)))
            grid = grid.astype(int) if isinstance(
                hp, CS.UniformIntegerHyperparameter) else grid
            param_ranges.append(np.unique(grid))
    full_grid = itertools.product(*param_ranges)
    if not convert_to_configspace:
        return list(full_grid)
    config_list = []
    for _config in full_grid:
        config_list.append(map_to_config(cs, _config))
    return config_list
예제 #4
0
파일: util.py 프로젝트: Neeratyoy/MMFB
def get_parameter_grid(
    cs: CS.ConfigurationSpace,
    grid_step_size: int = 10,
    convert_to_configspace: bool = False
) -> Union[List[Tuple], List[CS.Configuration]]:
    """Generates a grid from cartesian product of the parameters spaced out at given step size

    Parameters
    ----------
    cs : ConfigSpace.ConfigurationSpace
    grid_step_size : int
        The number of steps to divide a parameter dimension into
    convert_to_configspace : bool
        If True, returns a list of ConfigSpace objects of each point in the grid
        If False, returns a list of tuples containing the values of each point in grid

    Returns
    -------
    list
    """
    param_ranges = []
    for name in np.sort(cs.get_hyperparameter_names()):
        hp = cs.get_hyperparameter(str(name))
        if isinstance(hp, CS.CategoricalHyperparameter):
            param_ranges.append(hp.choices)
        elif isinstance(hp, CS.OrdinalHyperparameter):
            param_ranges.append(hp.sequence)
        elif isinstance(hp, CS.Constant):
            param_ranges.append([hp.value])
        else:
            if hp.log:
                grid = np.exp(
                    np.linspace(np.log(hp.lower), np.log(hp.upper),
                                grid_step_size))
                grid = np.clip(grid, hp.lower, hp.upper).astype(np.float32)
            else:
                grid = np.linspace(hp.lower,
                                   hp.upper,
                                   grid_step_size,
                                   dtype=np.float32)
            grid = grid.astype(int) if isinstance(
                hp, CS.UniformIntegerHyperparameter) else grid
            grid = np.unique(grid).tolist()
            param_ranges.append(grid)
    full_grid = itertools.product(*param_ranges)
    if not convert_to_configspace:
        return list(full_grid)
    config_list = []
    for _config in full_grid:
        config_list.append(map_to_config(cs, _config))
    return config_list
예제 #5
0
파일: util.py 프로젝트: Neeratyoy/MMFB
def get_discrete_configspace(configspace: CS.ConfigurationSpace,
                             grid_size: int = 10,
                             seed: Union[int, None] = None,
                             fidelity_space: bool = False):
    """ Generates a new discretized ConfigurationSpace from a generally defined space

    Given the discretization grid size for each dimension, the new ConfigurationSpace contains
    each hyperparmater as an OrdinalParameter with the discretized values for that dimension as
    the sequence of choices available for that hyperparameter.

    Parameters
    ----------
    configspace : ConfigSpace.ConfigurationSpace
    grid_size : int
        The number of steps to divide a parameter dimension into
    seed : int

    Returns
    -------
    ConfigSpace.ConfigurationSpace
    """
    if fidelity_space:
        grid_list = pd.DataFrame(get_fidelity_grid(configspace, grid_size))
    else:
        grid_list = pd.DataFrame(get_parameter_grid(configspace, grid_size))
    cs = CS.ConfigurationSpace(seed=seed)
    hp_names = np.sort(configspace.get_hyperparameter_names()).tolist()
    for i, k in enumerate(hp_names):
        choices = grid_list.iloc[:, i].unique()
        if isinstance(configspace.get_hyperparameter(k),
                      CS.UniformIntegerHyperparameter):
            choices = choices.astype(int)
        elif isinstance(configspace.get_hyperparameter(k),
                        CS.UniformFloatHyperparameter):
            choices = choices.astype(np.float32)
        cs.add_hyperparameter(CS.OrdinalHyperparameter(str(k), choices))
    return cs
예제 #6
0
    def extract_configs(self, data, cs: ConfigurationSpace, id_to_config=None):
        """
        After completion, every unique configuration in the data will have a
        corresponding id in the data-frame.
        The data-frame is expected to either contain a column for config-id OR
        columns for each individual hyperparameter. Parameter-names will be used
        from the provided configspace.
        If a mapping of ids to configurations already exists, it will be used.

        Parameters
        ----------
        data: pd.DataFrame
            pandas dataframe containing either a column called `config_id` or a
            column for every individual parameter
        cs: ConfigurationSpace
            optional, if provided the `parameters`-argument will be ignored
        id_to_config: dict[int:Configuration]
            optional, mapping ids to Configurations (necessary when using
            `config_id`-column)

        Returns
        -------
        data: pd.DataFrame
            if no config-id-columns was there before, there is one now.
        id_to_config: dict
            mapping every id to a configuration
        """
        if id_to_config:
            config_to_id = {conf: name for name, conf in id_to_config.items()}
        else:
            id_to_config = {}
            config_to_id = {}

        parameters = cs.get_hyperparameter_names()

        if 'config_id' in data.columns and not id_to_config:
            raise ValueError("When defining configs with \"config_id\" "
                             "in header, you need to provide the argument "
                             "\"configurations\" to the CSV2RH-object - "
                             "either as a dict, mapping the id's to "
                             "Configurations or as a path to a csv-file "
                             "containing the necessary information.")

        if 'config_id' not in data.columns:
            # Map to configurations
            ids_in_order = []
            data['config_id'] = -1

            def add_config(row):
                values = {
                    name: row[name]
                    for name in parameters if row[name] != ''
                }
                config = deactivate_inactive_hyperparameters(
                    fix_types(values, cs), cs)
                if config not in config_to_id:
                    config_to_id[config] = len(config_to_id)
                row['config_id'] = config_to_id[config]
                return row

            data = data.apply(add_config, axis=1)
            id_to_config = {conf: name for name, conf in config_to_id.items()}

        data["config_id"] = pd.to_numeric(data["config_id"])

        # Check whether all config-ids are present
        if len(set(data['config_id']) - set(id_to_config.keys())) > 0:
            raise ValueError(
                "config id {} cannot be identified (is your configurations.csv complete? Or maybe "
                "this is a type-issue...".format(
                    set(data['config_id']) - set(id_to_config.keys())))

        return data, id_to_config