def map_to_config(cs: CS.ConfigurationSpace, vector: Union[np.array, List]) -> CS.Configuration: """Return a ConfigSpace object with values assigned from the vector Parameters ---------- cs : ConfigSpace.ConfigurationSpace vector : np.array or list Returns ------- ConfigSpace.Configuration """ config = cs.sample_configuration() for i, name in enumerate(np.sort(cs.get_hyperparameter_names())): hp = cs.get_hyperparameter(str(name)) if isinstance(hp, CS.UniformIntegerHyperparameter): config[hp.name] = int(vector[i]) elif isinstance(hp, CS.UniformFloatHyperparameter): # clip introduced to handle the edge case when a 64-bit float type casting introduces # extra precision which can make the number lower than the hard hp.lower limit config[hp.name] = np.clip(float(vector[i]), hp.lower, hp.upper) else: config[hp.name] = vector[i] return config
def _check_and_cast_configuration(configuration: Union[Dict, ConfigSpace.Configuration], configuration_space: ConfigSpace.ConfigurationSpace) \ -> ConfigSpace.Configuration: """ Helper-function to evaluate the given configuration. Cast it to a ConfigSpace.Configuration and evaluate if it violates its boundaries. Note: We remove inactive hyperparameters from the given configuration. Inactive hyperparameters are hyperparameters that are not relevant for a configuration, e.g. hyperparameter A is only relevant if hyperparameter B=1 and if B!=1 then A is inactive and will be removed from the configuration. Since the authors of the benchmark removed those parameters explicitly, they should also handle the cases that inactive parameters are not present in the input-configuration. """ if isinstance(configuration, dict): configuration = ConfigSpace.Configuration(configuration_space, configuration, allow_inactive_with_values=True) elif isinstance(configuration, ConfigSpace.Configuration): configuration = configuration else: raise TypeError(f'Configuration has to be from type List, np.ndarray, dict, or ' f'ConfigSpace.Configuration but was {type(configuration)}') all_hps = set(configuration_space.get_hyperparameter_names()) active_hps = configuration_space.get_active_hyperparameters(configuration) inactive_hps = all_hps - active_hps if len(inactive_hps) != 0: logger.debug(f'There are inactive {len(inactive_hps)} hyperparameter: {inactive_hps}' 'Going to remove them from the configuration.') configuration = deactivate_inactive_hyperparameters(configuration, configuration_space) configuration_space.check_configuration(configuration) return configuration
def get_fidelity_grid( cs: CS.ConfigurationSpace, grid_step_size: int = 10, convert_to_configspace: bool = False, include_sh_budgets: bool = True ) -> Union[List[Tuple], List[CS.Configuration]]: """Generates a grid from cartesian product of the fidelity spaced out at given step size Parameters ---------- cs : ConfigSpace.ConfigurationSpace grid_step_size : int The number of steps to divide a parameter dimension into convert_to_configspace : bool If True, returns a list of ConfigSpace objects of each point in the grid If False, returns a list of tuples containing the values of each point in grid include_sh_budgets : bool If True, additionally includes budget spacing from Hyperband for eta={2,3,4} Returns ------- list """ param_ranges = [] selected_etas = [3] for name in np.sort(cs.get_hyperparameter_names()): hp = cs.get_hyperparameter(str(name)) if isinstance(hp, CS.Constant): param_ranges.append([hp.value]) else: if hp.log: grid = np.exp( np.linspace(np.log(hp.lower), np.log(hp.upper), grid_step_size)) grid = np.clip(grid, hp.lower, hp.upper) #.astype(np.float32) else: grid = np.linspace(hp.lower, hp.upper, grid_step_size) #, dtype=np.float32) if include_sh_budgets: hb_grid = np.array([]) for eta in selected_etas: hb_grid = np.concatenate( (hb_grid, generate_SH_fidelities(hp.lower, hp.upper, eta))) # .astype(np.float32) grid = np.unique(np.concatenate((hb_grid, grid))) grid = grid.astype(int) if isinstance( hp, CS.UniformIntegerHyperparameter) else grid param_ranges.append(np.unique(grid)) full_grid = itertools.product(*param_ranges) if not convert_to_configspace: return list(full_grid) config_list = [] for _config in full_grid: config_list.append(map_to_config(cs, _config)) return config_list
def get_parameter_grid( cs: CS.ConfigurationSpace, grid_step_size: int = 10, convert_to_configspace: bool = False ) -> Union[List[Tuple], List[CS.Configuration]]: """Generates a grid from cartesian product of the parameters spaced out at given step size Parameters ---------- cs : ConfigSpace.ConfigurationSpace grid_step_size : int The number of steps to divide a parameter dimension into convert_to_configspace : bool If True, returns a list of ConfigSpace objects of each point in the grid If False, returns a list of tuples containing the values of each point in grid Returns ------- list """ param_ranges = [] for name in np.sort(cs.get_hyperparameter_names()): hp = cs.get_hyperparameter(str(name)) if isinstance(hp, CS.CategoricalHyperparameter): param_ranges.append(hp.choices) elif isinstance(hp, CS.OrdinalHyperparameter): param_ranges.append(hp.sequence) elif isinstance(hp, CS.Constant): param_ranges.append([hp.value]) else: if hp.log: grid = np.exp( np.linspace(np.log(hp.lower), np.log(hp.upper), grid_step_size)) grid = np.clip(grid, hp.lower, hp.upper).astype(np.float32) else: grid = np.linspace(hp.lower, hp.upper, grid_step_size, dtype=np.float32) grid = grid.astype(int) if isinstance( hp, CS.UniformIntegerHyperparameter) else grid grid = np.unique(grid).tolist() param_ranges.append(grid) full_grid = itertools.product(*param_ranges) if not convert_to_configspace: return list(full_grid) config_list = [] for _config in full_grid: config_list.append(map_to_config(cs, _config)) return config_list
def get_discrete_configspace(configspace: CS.ConfigurationSpace, grid_size: int = 10, seed: Union[int, None] = None, fidelity_space: bool = False): """ Generates a new discretized ConfigurationSpace from a generally defined space Given the discretization grid size for each dimension, the new ConfigurationSpace contains each hyperparmater as an OrdinalParameter with the discretized values for that dimension as the sequence of choices available for that hyperparameter. Parameters ---------- configspace : ConfigSpace.ConfigurationSpace grid_size : int The number of steps to divide a parameter dimension into seed : int Returns ------- ConfigSpace.ConfigurationSpace """ if fidelity_space: grid_list = pd.DataFrame(get_fidelity_grid(configspace, grid_size)) else: grid_list = pd.DataFrame(get_parameter_grid(configspace, grid_size)) cs = CS.ConfigurationSpace(seed=seed) hp_names = np.sort(configspace.get_hyperparameter_names()).tolist() for i, k in enumerate(hp_names): choices = grid_list.iloc[:, i].unique() if isinstance(configspace.get_hyperparameter(k), CS.UniformIntegerHyperparameter): choices = choices.astype(int) elif isinstance(configspace.get_hyperparameter(k), CS.UniformFloatHyperparameter): choices = choices.astype(np.float32) cs.add_hyperparameter(CS.OrdinalHyperparameter(str(k), choices)) return cs
def extract_configs(self, data, cs: ConfigurationSpace, id_to_config=None): """ After completion, every unique configuration in the data will have a corresponding id in the data-frame. The data-frame is expected to either contain a column for config-id OR columns for each individual hyperparameter. Parameter-names will be used from the provided configspace. If a mapping of ids to configurations already exists, it will be used. Parameters ---------- data: pd.DataFrame pandas dataframe containing either a column called `config_id` or a column for every individual parameter cs: ConfigurationSpace optional, if provided the `parameters`-argument will be ignored id_to_config: dict[int:Configuration] optional, mapping ids to Configurations (necessary when using `config_id`-column) Returns ------- data: pd.DataFrame if no config-id-columns was there before, there is one now. id_to_config: dict mapping every id to a configuration """ if id_to_config: config_to_id = {conf: name for name, conf in id_to_config.items()} else: id_to_config = {} config_to_id = {} parameters = cs.get_hyperparameter_names() if 'config_id' in data.columns and not id_to_config: raise ValueError("When defining configs with \"config_id\" " "in header, you need to provide the argument " "\"configurations\" to the CSV2RH-object - " "either as a dict, mapping the id's to " "Configurations or as a path to a csv-file " "containing the necessary information.") if 'config_id' not in data.columns: # Map to configurations ids_in_order = [] data['config_id'] = -1 def add_config(row): values = { name: row[name] for name in parameters if row[name] != '' } config = deactivate_inactive_hyperparameters( fix_types(values, cs), cs) if config not in config_to_id: config_to_id[config] = len(config_to_id) row['config_id'] = config_to_id[config] return row data = data.apply(add_config, axis=1) id_to_config = {conf: name for name, conf in config_to_id.items()} data["config_id"] = pd.to_numeric(data["config_id"]) # Check whether all config-ids are present if len(set(data['config_id']) - set(id_to_config.keys())) > 0: raise ValueError( "config id {} cannot be identified (is your configurations.csv complete? Or maybe " "this is a type-issue...".format( set(data['config_id']) - set(id_to_config.keys()))) return data, id_to_config