def _cost(config: Configuration, run_history: RunHistory, instance_seed_pairs=None): """Return array of all costs for the given config for further calculations. Parameters ---------- config : Configuration Configuration to calculate objective for run_history : RunHistory RunHistory object from which the objective value is computed. instance_seed_pairs : list, optional (default=None) List of tuples of instance-seeds pairs. If None, the run_history is queried for all runs of the given configuration. Returns ------- Costs: list Array of all costs """ try: id_ = run_history.config_ids[config] except KeyError: # challenger was not running so far return [] if instance_seed_pairs is None: instance_seed_pairs = run_history.get_runs_for_config(config) costs = [] for i, r in instance_seed_pairs: k = RunKey(id_, i, r) costs.append(run_history.data[k].cost) return costs
def add(self, config: Configuration, cost: float, time: float, status: StatusType, instance_id: str = "", seed: int = 0, additional_info: dict = None, origin: DataOrigin = DataOrigin.INTERNAL): """Adds a data of a new target algorithm (TA) run; it will update data if the same key values are used (config, instance_id, seed) Parameters ---------- config : dict (or other type -- depending on config space module) Parameter configuration cost: float Cost of TA run (will be minimized) time: float Runtime of TA run status: str Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT} instance_id: str String representing an instance (default: None) seed: int Random seed used by TA (default: None) additional_info: dict Additional run infos (could include further returned information from TA or fields such as start time and host_id) origin: DataOrigin Defines how data will be used. """ if not instance_id: instance_id = None config_id = self.config_ids.get(config) if config_id is None: # it's a new config new_id = get_id_of_config(config) self.config_ids[config] = new_id config_id = self.config_ids.get(config) self.ids_config[new_id] = config k = RunKey(config_id, instance_id, seed) v = RunValue(cost, time, status, additional_info) # Each runkey is supposed to be used only once. Repeated tries to add # the same runkey will be ignored silently if not capped. if self.overwrite_existing_runs or self.data.get(k) is None: self._add(k, v, status, origin) elif status != StatusType.CAPPED and self.data[ k].status == StatusType.CAPPED: # overwrite capped runs with uncapped runs self._add(k, v, status, origin) elif status == StatusType.CAPPED and self.data[ k].status == StatusType.CAPPED and cost > self.data[k].cost: # overwrite if censored with a larger cutoff self._add(k, v, status, origin)
def get_instance_costs_for_config(self, config: Configuration): """ Returns the average cost per instance (across seeds) for a configuration Parameters ---------- config : Configuration from ConfigSpace Parameter configuration Returns ------- cost_per_inst: dict<instance name<str>, cost<float>> """ config_id = self.config_ids.get(config) runs_ = self._configid_to_inst_seed.get(config_id, []) cost_per_inst = {} for inst, seed in runs_: cost_per_inst[inst] = cost_per_inst.get(inst, []) rkey = RunKey(config_id, inst, seed) vkey = self.data[rkey] cost_per_inst[inst].append(vkey.cost) cost_per_inst = dict([(inst, np.mean(costs)) for inst, costs in cost_per_inst.items()]) return cost_per_inst
def _get_runs(self, configs: Union[str, typing.List[Configuration]], insts: Union[str, typing.List[str]], repetitions: int=1, runhistory: RunHistory=None, ) -> typing.Tuple[typing.List[_Run], RunHistory]: """ Generate list of SMAC-TAE runs to be executed. This means combinations of configs with all instances on a certain number of seeds. side effect: Adds runs that don't need to be reevaluated to self.rh! Parameters ---------- configs: str or list<Configuration> string or directly a list of Configuration str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time insts: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of seeds per instance/config-pair to be evaluated runhistory: RunHistory optional, try to reuse this runhistory and save some runs Returns ------- runs: list<_Run> list with _Runs [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1), _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2), ...] """ # Get relevant configurations and instances if isinstance(configs, str): configs = self._get_configs(configs) if isinstance(insts, str): insts = self._get_instances(insts) # If no instances are given, fix the instances to one "None" instance if not insts: insts = [None] # If algorithm is deterministic, fix repetitions to 1 if self.scen.deterministic and repetitions != 1: self.logger.warning("Specified %d repetitions, but fixing to 1, " "because algorithm is deterministic.", repetitions) repetitions = 1 # Extract relevant information from given runhistory inst_seed_config = self._process_runhistory(configs, insts, runhistory) # Now create the actual run-list runs = [] # Counter for runs without the need of recalculation runs_from_rh = 0 # If we reuse runs, we want to return them as well new_rh = RunHistory(average_cost) for i in sorted(insts): for rep in range(repetitions): # First, find a seed and add all the data we can take from the # given runhistory to "our" validation runhistory. configs_evaluated = [] if runhistory and i in inst_seed_config: # Choose seed based on most often evaluated inst-seed-pair seed, configs_evaluated = inst_seed_config[i].pop(0) # Delete inst if all seeds are used if not inst_seed_config[i]: inst_seed_config.pop(i) # Add runs to runhistory for c in configs_evaluated[:]: runkey = RunKey(runhistory.config_ids[c], i, seed) cost, time, status, additional_info = runhistory.data[runkey] if status in [StatusType.CRASHED, StatusType.ABORT, StatusType.CAPPED]: # Not properly executed target algorithm runs should be repeated configs_evaluated.remove(c) continue new_rh.add(c, cost, time, status, instance_id=i, seed=seed, additional_info=additional_info) runs_from_rh += 1 else: # If no runhistory or no entries for instance, get new seed seed = self.rng.randint(MAXINT) # We now have a seed and add all configs that are not already # evaluated on that seed to the runs-list. This way, we # guarantee the same inst-seed-pairs for all configs. for config in [c for c in configs if not c in configs_evaluated]: # Only use specifics if specific exists, else use string "0" specs = self.scen.instance_specific[i] if i and i in self.scen.instance_specific else "0" runs.append(_Run(config=config, inst=i, seed=seed, inst_specs=specs)) self.logger.info("Collected %d runs from %d configurations on %d " "instances with %d repetitions. Reusing %d runs from " "given runhistory.", len(runs), len(configs), len(insts), repetitions, runs_from_rh) return runs, new_rh
def fmin_smac(func: typing.Callable, x0: typing.List[float], bounds: typing.List[typing.Iterable[float]], maxfun: int = -1, rng: typing.Union[np.random.RandomState, int] = None, scenario_args: typing.Mapping[str, typing.Any] = None, **kwargs): """ Minimize a function func using the SMAC4HPO facade (i.e., a modified version of SMAC). This function is a convenience wrapper for the SMAC4HPO class. Parameters ---------- func : typing.Callable Function to minimize. x0 : typing.List[float] Initial guess/default configuration. bounds : typing.List[typing.List[float]] ``(min, max)`` pairs for each element in ``x``, defining the bound on that parameters. maxfun : int, optional Maximum number of function evaluations. rng : np.random.RandomState, optional Random number generator used by SMAC. scenario_args: typing.Mapping[str,typing.Any] Arguments passed to the scenario See dsmac.scenario.scenario.Scenario **kwargs: Arguments passed to the optimizer class See ~dsmac.facade.smac_facade.SMAC Returns ------- x : list Estimated position of the minimum. f : float Value of `func` at the minimum. s : :class:`smac.facade.smac_hpo_facade.SMAC4HPO` SMAC objects which enables the user to get e.g., the trajectory and runhistory. """ # create configuration space cs = ConfigurationSpace() # Adjust zero padding tmplt = 'x{0:0' + str(len(str(len(bounds)))) + 'd}' for idx, (lower_bound, upper_bound) in enumerate(bounds): parameter = UniformFloatHyperparameter(name=tmplt.format(idx + 1), lower=lower_bound, upper=upper_bound, default_value=x0[idx]) cs.add_hyperparameter(parameter) # create scenario scenario_dict = { "run_obj": "quality", "cs": cs, "deterministic": "true", "initial_incumbent": "DEFAULT", } if scenario_args is not None: scenario_dict.update(scenario_args) if maxfun > 0: scenario_dict["runcount_limit"] = maxfun scenario = Scenario(scenario_dict) smac = SMAC4HPO(scenario=scenario, tae_runner=ExecuteTAFuncArray, tae_runner_kwargs={'ta': func}, rng=rng, **kwargs) smac.logger = logging.getLogger(smac.__module__ + "." + smac.__class__.__name__) incumbent = smac.optimize() config_id = smac.solver.runhistory.config_ids[incumbent] run_key = RunKey(config_id, None, 0) incumbent_performance = smac.solver.runhistory.data[run_key] incumbent = np.array( [incumbent[tmplt.format(idx + 1)] for idx in range(len(bounds))], dtype=np.float) return incumbent, incumbent_performance.cost, smac