def __init__(self, target_names: List[str], configspace: ConfigurationSpace, bounds: List[Tuple[float, float]], types: np.ndarray, seed: int, rf_kwargs: Optional[Dict[str, Any]] = None, **kwargs): """Constructor Parameters ---------- target_names : list List of str, each entry is the name of one target dimension. Length of the list will be ``n_objectives``. bounds : np.ndarray See :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` documentation. types : np.ndarray See :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` documentation. kwargs See :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` documentation. """ super().__init__(configspace=configspace, bounds=bounds, types=types, seed=seed, **kwargs) if rf_kwargs is None: rf_kwargs = {} self.target_names = target_names self.num_targets = len(self.target_names) print(seed, rf_kwargs) self.estimators = [ RandomForestWithInstances(configspace, types, bounds, **rf_kwargs) for _ in range(self.num_targets) ]
def __init__( self, scenario: Scenario, tae_runner: Optional[Union[Type[ExecuteTARun], Callable]] = None, tae_runner_kwargs: Optional[dict] = None, runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None, runhistory_kwargs: Optional[dict] = None, intensifier: Optional[Type[Intensifier]] = None, intensifier_kwargs: Optional[dict] = None, acquisition_function: Optional[ Type[AbstractAcquisitionFunction]] = None, acquisition_function_kwargs: Optional[dict] = None, integrate_acquisition_function: bool = False, acquisition_function_optimizer: Optional[ Type[AcquisitionFunctionMaximizer]] = None, acquisition_function_optimizer_kwargs: Optional[dict] = None, model: Optional[Type[AbstractEPM]] = None, model_kwargs: Optional[dict] = None, runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None, runhistory2epm_kwargs: Optional[dict] = None, initial_design: Optional[Type[InitialDesign]] = None, initial_design_kwargs: Optional[dict] = None, initial_configurations: Optional[List[Configuration]] = None, stats: Optional[Stats] = None, restore_incumbent: Optional[Configuration] = None, rng: Optional[Union[np.random.RandomState, int]] = None, smbo_class: Optional[SMBO] = None, run_id: Optional[int] = None, random_configuration_chooser: Optional[ Type[RandomConfigurationChooser]] = None, random_configuration_chooser_kwargs: Optional[dict] = None, ): """ Constructor Parameters ---------- scenario : ~dsmac.scenario.scenario.Scenario Scenario object tae_runner : ~dsmac.tae.execute_ta_run.ExecuteTARun or callable Callable or implementation of :class:`~dsmac.tae.execute_ta_run.ExecuteTARun`. In case a callable is passed it will be wrapped by :class:`~dsmac.tae.execute_func.ExecuteTAFuncDict`. If not set, it will be initialized with the :class:`~dsmac.tae.execute_ta_run_old.ExecuteTARunOld`. tae_runner_kwargs: Optional[dict] arguments passed to constructor of '~tae_runner' runhistory : RunHistory runhistory to store all algorithm runs runhistory_kwargs : Optional[dict] arguments passed to constructor of runhistory. We strongly advise against changing the aggregation function, since it will break some code assumptions intensifier : Intensifier intensification object to issue a racing to decide the current incumbent intensifier_kwargs: Optional[dict] arguments passed to the constructor of '~intensifier' acquisition_function : ~dsmac.optimizer.acquisition.AbstractAcquisitionFunction Class or object that implements the :class:`~dsmac.optimizer.acquisition.AbstractAcquisitionFunction`. Will use :class:`~dsmac.optimizer.acquisition.EI` or :class:`~dsmac.optimizer.acquisition.LogEI` if not set. `~acquisition_function_kwargs` is passed to the class constructor. acquisition_function_kwargs : Optional[dict] dictionary to pass specific arguments to ~acquisition_function integrate_acquisition_function : bool, default=False Whether to integrate the acquisition function. Works only with models which can sample their hyperparameters (i.e. GaussianProcessMCMC). acquisition_function_optimizer : ~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer Object that implements the :class:`~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`. Will use :class:`dsmac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set. acquisition_function_optimizer_kwargs: Optional[dict] Arguments passed to constructor of '~acquisition_function_optimizer' model : AbstractEPM Model that implements train() and predict(). Will use a :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` if not set. model_kwargs : Optional[dict] Arguments passed to constructor of '~model' runhistory2epm : ~dsmac.runhistory.runhistory2epm.RunHistory2EMP Object that implements the AbstractRunHistory2EPM. If None, will use :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4Cost` if objective is cost or :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4LogCost` if objective is runtime. runhistory2epm_kwargs: Optional[dict] Arguments passed to the constructor of '~runhistory2epm' initial_design : InitialDesign initial sampling design initial_design_kwargs: Optional[dict] arguments passed to constructor of `~initial_design' initial_configurations : List[Configuration] list of initial configurations for initial design -- cannot be used together with initial_design stats : Stats optional stats object rng : np.random.RandomState Random number generator restore_incumbent : Configuration incumbent used if restoring to previous state smbo_class : ~dsmac.optimizer.smbo.SMBO Class implementing the SMBO interface which will be used to instantiate the optimizer class. run_id : int (optional) Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be chosen. random_configuration_chooser : ~dsmac.optimizer.random_configuration_chooser.RandomConfigurationChooser How often to choose a random configuration during the intensification procedure. random_configuration_chooser_kwargs : Optional[dict] arguments of constructor for '~random_configuration_chooser' """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) aggregate_func = average_cost self.scenario = scenario self.output_dir = "" if not restore_incumbent: # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an # output directory. This is the default path. # initial random number generator # run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger) # run_id=datetime.now().strftime("%Y%m%d%H%M%S%f") run_id = uuid1() # self.output_dir = create_output_directory(scenario, run_id) # fixme run_id self.output_dir = scenario.output_dir # create_output_directory(scenario, run_id) # fixme run_id elif scenario.output_dir is not None: run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger) # output-directory is created in CLI when restoring from a # folder. calling the function again in the facade results in two # folders being created: run_X and run_X.OLD. if we are # restoring, the output-folder exists already and we omit creating it, # but set the self-output_dir to the dir. # necessary because we want to write traj to new output-dir in CLI. self.output_dir = scenario.output_dir_for_this_run if (scenario.deterministic is True and getattr(scenario, 'tuner_timeout', None) is None and scenario.run_obj == 'quality'): self.logger.info( 'Optimizing a deterministic scenario for quality without a tuner timeout - will make ' 'SMAC deterministic and only evaluate one configuration per iteration!' ) scenario.intensification_percentage = 1e-10 scenario.min_chall = 1 scenario.write() # initialize stats object if stats: self.stats = stats else: self.stats = Stats(scenario, file_system=scenario.file_system) if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG": self.logger.warning( "Runtime as objective automatically activates log(y) transformation" ) self.scenario.transform_y = "LOG" # initialize empty runhistory runhistory_def_kwargs = {'aggregate_func': aggregate_func} if runhistory_kwargs is not None: runhistory_def_kwargs.update(runhistory_kwargs) if runhistory is None: runhistory = RunHistory(**runhistory_def_kwargs, file_system=scenario.file_system, db_type=scenario.db_type, db_args=scenario.db_args, db_kwargs=scenario.db_kwargs) elif inspect.isclass(runhistory): runhistory = runhistory(**runhistory_def_kwargs) else: if runhistory.aggregate_func is None: runhistory.aggregate_func = aggregate_func rand_conf_chooser_kwargs = {'rng': rng} if random_configuration_chooser_kwargs is not None: rand_conf_chooser_kwargs.update( random_configuration_chooser_kwargs) if random_configuration_chooser is None: if 'prob' not in rand_conf_chooser_kwargs: rand_conf_chooser_kwargs['prob'] = scenario.rand_prob random_configuration_chooser = ChooserProb( **rand_conf_chooser_kwargs) elif inspect.isclass(random_configuration_chooser): random_configuration_chooser = random_configuration_chooser( **rand_conf_chooser_kwargs) elif not isinstance(random_configuration_chooser, RandomConfigurationChooser): raise ValueError( "random_configuration_chooser has to be" " a class or object of RandomConfigurationChooser") # reset random number generator in config space to draw different # random configurations with each seed given to SMAC scenario.cs.seed(rng.randint(MAXINT)) # initial Trajectory Logger traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats, file_system=scenario.file_system) # initial EPM types, bounds = get_types(scenario.cs, scenario.feature_array) model_def_kwargs = { 'types': types, 'bounds': bounds, 'instance_features': scenario.feature_array, 'seed': rng.randint(MAXINT), 'pca_components': scenario.PCA_DIM, } if model_kwargs is not None: model_def_kwargs.update(model_kwargs) if model is None: for key, value in { 'log_y': scenario.transform_y in ["LOG", "LOGS"], 'num_trees': scenario.rf_num_trees, 'do_bootstrapping': scenario.rf_do_bootstrapping, 'ratio_features': scenario.rf_ratio_features, 'min_samples_split': scenario.rf_min_samples_split, 'min_samples_leaf': scenario.rf_min_samples_leaf, 'max_depth': scenario.rf_max_depth, }.items(): if key not in model_def_kwargs: model_def_kwargs[key] = value model_def_kwargs['configspace'] = self.scenario.cs model = RandomForestWithInstances(**model_def_kwargs) elif inspect.isclass(model): model_def_kwargs['configspace'] = self.scenario.cs model = model(**model_def_kwargs) else: raise TypeError("Model not recognized: %s" % (type(model))) # initial acquisition function acq_def_kwargs = {'model': model} if acquisition_function_kwargs is not None: acq_def_kwargs.update(acquisition_function_kwargs) if acquisition_function is None: if scenario.transform_y in ["LOG", "LOGS"]: acquisition_function = LogEI(**acq_def_kwargs) else: acquisition_function = EI(**acq_def_kwargs) elif inspect.isclass(acquisition_function): acquisition_function = acquisition_function(**acq_def_kwargs) else: raise TypeError( "Argument acquisition_function must be None or an object implementing the " "AbstractAcquisitionFunction, not %s." % type(acquisition_function)) if integrate_acquisition_function: acquisition_function = IntegratedAcquisitionFunction( acquisition_function=acquisition_function, **acq_def_kwargs) # initialize optimizer on acquisition function acq_func_opt_kwargs = { 'acquisition_function': acquisition_function, 'config_space': scenario.cs, 'rng': rng, } if acquisition_function_optimizer_kwargs is not None: acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs) if acquisition_function_optimizer is None: for key, value in { 'max_steps': scenario.sls_max_steps, 'n_steps_plateau_walk': scenario.sls_n_steps_plateau_walk, }.items(): if key not in acq_func_opt_kwargs: acq_func_opt_kwargs[key] = value acquisition_function_optimizer = InterleavedLocalAndRandomSearch( **acq_func_opt_kwargs) elif inspect.isclass(acquisition_function_optimizer): acquisition_function_optimizer = acquisition_function_optimizer( **acq_func_opt_kwargs) else: raise TypeError( "Argument acquisition_function_optimizer must be None or an object implementing the " "AcquisitionFunctionMaximizer, but is '%s'" % type(acquisition_function_optimizer)) # initialize tae_runner # First case, if tae_runner is None, the target algorithm is a call # string in the scenario file tae_def_kwargs = { 'stats': self.stats, 'run_obj': scenario.run_obj, 'runhistory': runhistory, 'par_factor': scenario.par_factor, 'cost_for_crash': scenario.cost_for_crash, 'abort_on_first_run_crash': scenario.abort_on_first_run_crash } if tae_runner_kwargs is not None: tae_def_kwargs.update(tae_runner_kwargs) if 'ta' not in tae_def_kwargs: tae_def_kwargs['ta'] = scenario.ta if tae_runner is None: tae_def_kwargs['ta'] = scenario.ta tae_runner = ExecuteTARunOld(**tae_def_kwargs) elif inspect.isclass(tae_runner): tae_runner = tae_runner(**tae_def_kwargs) elif callable(tae_runner): tae_def_kwargs['ta'] = tae_runner tae_runner = ExecuteTAFuncDict(**tae_def_kwargs) else: raise TypeError( "Argument 'tae_runner' is %s, but must be " "either None, a callable or an object implementing " "ExecuteTaRun. Passing 'None' will result in the " "creation of target algorithm runner based on the " "call string in the scenario file." % type(tae_runner)) # Check that overall objective and tae objective are the same if tae_runner.run_obj != scenario.run_obj: raise ValueError("Objective for the target algorithm runner and " "the scenario must be the same, but are '%s' and " "'%s'" % (tae_runner.run_obj, scenario.run_obj)) # initialize intensification intensifier_def_kwargs = { 'tae_runner': tae_runner, 'stats': self.stats, 'traj_logger': traj_logger, 'rng': rng, 'instances': scenario.train_insts, 'cutoff': scenario.cutoff, 'deterministic': scenario.deterministic, 'run_obj_time': scenario.run_obj == "runtime", 'always_race_against': scenario.cs.get_default_configuration() if scenario.always_race_default else None, 'use_ta_time_bound': scenario.use_ta_time, 'instance_specifics': scenario.instance_specific, 'minR': scenario.minR, 'maxR': scenario.maxR, 'adaptive_capping_slackfactor': scenario.intens_adaptive_capping_slackfactor, 'min_chall': scenario.intens_min_chall, } if hasattr(scenario, 'filter_callback') and scenario.filter_callback is not None: print('update callback') intensifier_def_kwargs.update( {'filter_callback': scenario.filter_callback}) if intensifier_kwargs is not None: intensifier_def_kwargs.update(intensifier_kwargs) if intensifier is None: intensifier = Intensifier(**intensifier_def_kwargs) elif inspect.isclass(intensifier): intensifier = intensifier(**intensifier_def_kwargs) else: raise TypeError( "Argument intensifier must be None or an object implementing the Intensifier, but is '%s'" % type(intensifier)) # initial design if initial_design is not None and initial_configurations is not None: initial_design.initial_configurations = initial_configurations initial_configurations = None init_design_def_kwargs = { 'tae_runner': tae_runner, 'scenario': scenario, 'stats': self.stats, 'traj_logger': traj_logger, 'runhistory': runhistory, 'rng': rng, 'configs': initial_configurations, 'intensifier': intensifier, 'aggregate_func': aggregate_func, 'n_configs_x_params': 0, 'max_config_fracs': 0.0, 'initial_configurations': initial_design.initial_configurations } if initial_design_kwargs is not None: init_design_def_kwargs.update(initial_design_kwargs) if initial_configurations is not None: initial_design = InitialDesign(**init_design_def_kwargs) elif initial_design is None: if scenario.initial_incumbent == "DEFAULT": init_design_def_kwargs['max_config_fracs'] = 0.0 initial_design = DefaultConfiguration(**init_design_def_kwargs) elif scenario.initial_incumbent == "RANDOM": init_design_def_kwargs['max_config_fracs'] = 0.0 initial_design = RandomConfigurations(**init_design_def_kwargs) elif scenario.initial_incumbent == "LHD": initial_design = LHDesign(**init_design_def_kwargs) elif scenario.initial_incumbent == "FACTORIAL": initial_design = FactorialInitialDesign( **init_design_def_kwargs) elif scenario.initial_incumbent == "SOBOL": initial_design = SobolDesign(**init_design_def_kwargs) else: raise ValueError("Don't know what kind of initial_incumbent " "'%s' is" % scenario.initial_incumbent) elif inspect.isclass(initial_design): initial_design = initial_design(**init_design_def_kwargs) else: raise TypeError( "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'" % type(initial_design)) # if we log the performance data, # the RFRImputator will already get # log transform data from the runhistory if scenario.transform_y in ["LOG", "LOGS"]: cutoff = np.log(np.nanmin([np.inf, np.float_(scenario.cutoff)])) threshold = cutoff + np.log(scenario.par_factor) else: cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)]) threshold = cutoff * scenario.par_factor num_params = len(scenario.cs.get_hyperparameters()) imputor = RFRImputator(rng=rng, cutoff=cutoff, threshold=threshold, model=model, change_threshold=0.01, max_iter=2) r2e_def_kwargs = { 'scenario': scenario, 'num_params': num_params, 'success_states': [ StatusType.SUCCESS, ], 'impute_censored_data': True, 'impute_state': [ StatusType.CAPPED, ], 'imputor': imputor, 'scale_perc': 5 } if scenario.run_obj == 'quality': r2e_def_kwargs.update({ 'success_states': [StatusType.SUCCESS, StatusType.CRASHED], 'impute_censored_data': False, 'impute_state': None, }) if runhistory2epm_kwargs is not None: r2e_def_kwargs.update(runhistory2epm_kwargs) if runhistory2epm is None: if scenario.run_obj == 'runtime': runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs) elif scenario.run_obj == 'quality': if scenario.transform_y == "NONE": runhistory2epm = RunHistory2EPM4Cost(**r2e_def_kwargs) elif scenario.transform_y == "LOG": runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs) elif scenario.transform_y == "LOGS": runhistory2epm = RunHistory2EPM4LogScaledCost( **r2e_def_kwargs) elif scenario.transform_y == "INVS": runhistory2epm = RunHistory2EPM4InvScaledCost( **r2e_def_kwargs) else: raise ValueError('Unknown run objective: %s. Should be either ' 'quality or runtime.' % self.scenario.run_obj) elif inspect.isclass(runhistory2epm): runhistory2epm = runhistory2epm(**r2e_def_kwargs) else: raise TypeError( "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'" % type(runhistory2epm)) smbo_args = { 'scenario': scenario, 'stats': self.stats, 'initial_design': initial_design, 'runhistory': runhistory, 'runhistory2epm': runhistory2epm, 'intensifier': intensifier, 'aggregate_func': aggregate_func, 'num_run': run_id, 'model': model, 'acq_optimizer': acquisition_function_optimizer, 'acquisition_func': acquisition_function, 'rng': rng, 'restore_incumbent': restore_incumbent, 'random_configuration_chooser': random_configuration_chooser } if smbo_class is None: self.solver = SMBO(**smbo_args) else: self.solver = smbo_class(**smbo_args)
class Validator(object): """ Validator for the output of SMAC-scenarios. Evaluates specified configurations on specified instances. """ def __init__(self, scenario: Scenario, trajectory: list, rng: Union[np.random.RandomState, int] = None): """ Construct Validator for given scenario and trajectory. Parameters ---------- scenario: Scenario scenario object for cutoff, instances, features and specifics trajectory: trajectory-list trajectory to take incumbent(s) from rng: np.random.RandomState or int Random number generator or seed """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.traj = trajectory self.scen = scenario self.epm = None if isinstance(rng, np.random.RandomState): self.rng = rng elif isinstance(rng, int): self.rng = np.random.RandomState(seed=rng) else: self.logger.debug('no seed given, using default seed of 1') num_run = 1 self.rng = np.random.RandomState(seed=num_run) def _save_results(self, rh: RunHistory, output_fn, backup_fn=None): """ Helper to save results to file Parameters ---------- rh: RunHistory runhistory to save output_fn: str if ends on '.json': filename to save history to else: directory to save runhistory to (filename is backup_fn) backup_fn: str if output_fn does not end on '.json', treat output_fn as dir and append backup_fn as filename (if output_fn ends on '.json', this argument is ignored) """ if output_fn == "": self.logger.info( "No output specified, validated runhistory not saved.") return # Check if a folder or a file is specified as output if not output_fn.endswith('.json'): output_dir = output_fn output_fn = os.path.join(output_dir, backup_fn) self.logger.debug("Output is \"%s\", changing to \"%s\"!", output_dir, output_fn) base = os.path.split(output_fn)[0] if not base == "" and not os.path.exists(base): self.logger.debug("Folder (\"%s\") doesn't exist, creating.", base) os.makedirs(base) rh.save_json(output_fn) self.logger.info("Saving validation-results in %s", output_fn) def validate( self, config_mode: Union[str, typing.List[Configuration]] = 'def', instance_mode: Union[str, typing.List[str]] = 'test', repetitions: int = 1, n_jobs: int = 1, backend: str = 'threading', runhistory: RunHistory = None, tae: ExecuteTARun = None, output_fn: str = "", ) -> RunHistory: """ Validate configs on instances and save result in runhistory. If a runhistory is provided as input it is important that you run it on the same/comparable hardware. side effect: if output is specified, saves runhistory to specified output directory. Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration. string from [def, inc, def+inc, wallclock_time, cpu_time, all]. time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of repetitions in nondeterministic algorithms n_jobs: int number of parallel processes used by joblib backend: str what backend joblib should use for parallel runs runhistory: RunHistory optional, RunHistory-object to reuse runs tae: ExecuteTARun tae to be used. if None, will initialize ExecuteTARunOld output_fn: str path to runhistory to be saved. if the suffix is not '.json', will be interpreted as directory and filename will be 'validated_runhistory.json' Returns ------- runhistory: RunHistory runhistory with validated runs """ self.logger.debug( "Validating configs '%s' on instances '%s', repeating %d times" " with %d parallel runs on backend '%s'.", config_mode, instance_mode, repetitions, n_jobs, backend) # Get all runs to be evaluated as list runs, validated_rh = self._get_runs(config_mode, instance_mode, repetitions, runhistory) # Create new Stats without limits inf_scen = Scenario({ 'run_obj': self.scen.run_obj, 'cutoff_time': self.scen.cutoff, 'output_dir': "" }) inf_stats = Stats(inf_scen) inf_stats.start_timing() # Create TAE if not tae: tae = ExecuteTARunOld(ta=self.scen.ta, stats=inf_stats, run_obj=self.scen.run_obj, par_factor=self.scen.par_factor, cost_for_crash=self.scen.cost_for_crash) else: # Inject endless-stats tae.stats = inf_stats # Validate! run_results = self._validate_parallel(tae, runs, n_jobs, backend) # tae returns (status, cost, runtime, additional_info) # Add runs to RunHistory idx = 0 for result in run_results: validated_rh.add(config=runs[idx].config, cost=result[1], time=result[2], status=result[0], instance_id=runs[idx].inst, seed=runs[idx].seed, additional_info=result[3]) idx += 1 if output_fn: self._save_results(validated_rh, output_fn, backup_fn="validated_runhistory.json") return validated_rh def _validate_parallel(self, tae: ExecuteTARun, runs: typing.List[_Run], n_jobs: int, backend: str): """ Validate runs with joblibs Parallel-interface Parameters ---------- tae: ExecuteTARun tae to be used for validation runs: list<_Run> list with _Run-objects [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1), ...] n_jobs: int number of cpus to use for validation (-1 to use all) backend: str what backend to use for parallelization Returns ------- run_results: list<tuple(tae-returns)> results as returned by tae """ # Runs with parallel run_results = Parallel(n_jobs=n_jobs, backend=backend)( delayed(_unbound_tae_starter)(tae, run.config, run.inst, self.scen.cutoff, run.seed, run.inst_specs, capped=False) for run in runs) return run_results def validate_epm( self, config_mode: Union[str, typing.List[Configuration]] = 'def', instance_mode: Union[str, typing.List[str]] = 'test', repetitions: int = 1, runhistory: RunHistory = None, output_fn="", reuse_epm=True, ) -> RunHistory: """ Use EPM to predict costs/runtimes for unknown config/inst-pairs. side effect: if output is specified, saves runhistory to specified output directory. Parameters ---------- output_fn: str path to runhistory to be saved. if the suffix is not '.json', will be interpreted as directory and filename will be 'validated_runhistory_EPM.json' config_mode: str or list<Configuration> string or directly a list of Configuration, string from [def, inc, def+inc, wallclock_time, cpu_time, all]. time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of repetitions in nondeterministic algorithms runhistory: RunHistory optional, RunHistory-object to reuse runs reuse_epm: bool if true (and if `self.epm`), reuse epm to validate runs Returns ------- runhistory: RunHistory runhistory with predicted runs """ if not isinstance(runhistory, RunHistory) and (self.epm is None or reuse_epm is False): raise ValueError( "No runhistory specified for validating with EPM!") elif reuse_epm is False or self.epm is None: # Create RandomForest types, bounds = get_types(self.scen.cs, self.scen.feature_array) self.epm = RandomForestWithInstances( configspace=self.scen.cs, types=types, bounds=bounds, instance_features=self.scen.feature_array, seed=self.rng.randint(MAXINT), ratio_features=1.0, ) # Use imputor if objective is runtime imputor = None impute_state = None impute_censored_data = False if self.scen.run_obj == 'runtime': threshold = self.scen.cutoff * self.scen.par_factor imputor = RFRImputator(rng=self.rng, cutoff=self.scen.cutoff, threshold=threshold, model=self.epm) impute_censored_data = True impute_state = [StatusType.CAPPED] # Transform training data (from given rh) rh2epm = RunHistory2EPM4Cost( num_params=len(self.scen.cs.get_hyperparameters()), scenario=self.scen, rng=self.rng, impute_censored_data=impute_censored_data, imputor=imputor, impute_state=impute_state) X, y = rh2epm.transform(runhistory) self.logger.debug("Training model with data of shape X: %s, y:%s", str(X.shape), str(y.shape)) # Train random forest self.epm.train(X, y) # Predict desired runs runs, rh_epm = self._get_runs(config_mode, instance_mode, repetitions, runhistory) feature_array_size = len(self.scen.cs.get_hyperparameters()) if self.scen.feature_array is not None: feature_array_size += self.scen.feature_array.shape[1] X_pred = np.empty((len(runs), feature_array_size)) for idx, run in enumerate(runs): if self.scen.feature_array is not None and run.inst is not None: X_pred[idx] = np.hstack([ convert_configurations_to_array([run.config])[0], self.scen.feature_dict[run.inst] ]) else: X_pred[idx] = convert_configurations_to_array([run.config])[0] self.logger.debug("Predicting desired %d runs, data has shape %s", len(runs), str(X_pred.shape)) y_pred = self.epm.predict(X_pred) # Add runs to runhistory for run, pred in zip(runs, y_pred[0]): rh_epm.add( config=run.config, cost=float(pred), time=float(pred), status=StatusType.SUCCESS, instance_id=run.inst, seed=-1, additional_info={"additional_info": "ESTIMATED USING EPM!"}) if output_fn: self._save_results(rh_epm, output_fn, backup_fn="validated_runhistory_EPM.json") return rh_epm def _get_runs( self, configs: Union[str, typing.List[Configuration]], insts: Union[str, typing.List[str]], repetitions: int = 1, runhistory: RunHistory = None, ) -> typing.Tuple[typing.List[_Run], RunHistory]: """ Generate list of SMAC-TAE runs to be executed. This means combinations of configs with all instances on a certain number of seeds. side effect: Adds runs that don't need to be reevaluated to self.rh! Parameters ---------- configs: str or list<Configuration> string or directly a list of Configuration str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time insts: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of seeds per instance/config-pair to be evaluated runhistory: RunHistory optional, try to reuse this runhistory and save some runs Returns ------- runs: list<_Run> list with _Runs [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1), _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2), ...] """ # Get relevant configurations and instances if isinstance(configs, str): configs = self._get_configs(configs) if isinstance(insts, str): insts = self._get_instances(insts) # If no instances are given, fix the instances to one "None" instance if not insts: insts = [None] # If algorithm is deterministic, fix repetitions to 1 if self.scen.deterministic and repetitions != 1: self.logger.warning( "Specified %d repetitions, but fixing to 1, " "because algorithm is deterministic.", repetitions) repetitions = 1 # Extract relevant information from given runhistory inst_seed_config = self._process_runhistory(configs, insts, runhistory) # Now create the actual run-list runs = [] # Counter for runs without the need of recalculation runs_from_rh = 0 # If we reuse runs, we want to return them as well new_rh = RunHistory(average_cost) for i in sorted(insts): for rep in range(repetitions): # First, find a seed and add all the data we can take from the # given runhistory to "our" validation runhistory. configs_evaluated = [] if runhistory and i in inst_seed_config: # Choose seed based on most often evaluated inst-seed-pair seed, configs_evaluated = inst_seed_config[i].pop(0) # Delete inst if all seeds are used if not inst_seed_config[i]: inst_seed_config.pop(i) # Add runs to runhistory for c in configs_evaluated[:]: runkey = RunKey(runhistory.config_ids[c], i, seed) cost, time, status, additional_info = runhistory.data[ runkey] if status in [ StatusType.CRASHED, StatusType.ABORT, StatusType.CAPPED ]: # Not properly executed target algorithm runs should be repeated configs_evaluated.remove(c) continue new_rh.add(c, cost, time, status, instance_id=i, seed=seed, additional_info=additional_info) runs_from_rh += 1 else: # If no runhistory or no entries for instance, get new seed seed = self.rng.randint(MAXINT) # We now have a seed and add all configs that are not already # evaluated on that seed to the runs-list. This way, we # guarantee the same inst-seed-pairs for all configs. for config in [ c for c in configs if not c in configs_evaluated ]: # Only use specifics if specific exists, else use string "0" specs = self.scen.instance_specific[ i] if i and i in self.scen.instance_specific else "0" runs.append( _Run(config=config, inst=i, seed=seed, inst_specs=specs)) self.logger.info( "Collected %d runs from %d configurations on %d " "instances with %d repetitions. Reusing %d runs from " "given runhistory.", len(runs), len(configs), len(insts), repetitions, runs_from_rh) return runs, new_rh def _process_runhistory(self, configs: typing.List[Configuration], insts: typing.List[str], runhistory: RunHistory): """ Processes runhistory from self._get_runs by extracting already evaluated (relevant) config-inst-seed tuples. Parameters ---------- configs: list(Configuration) list of configs of interest insts: list(str) list of instances of interest runhistory: RunHistory runhistory to extract runs from Returns ------- inst_seed_config: dict<str : list(tuple(int, tuple(configs)))> dictionary mapping instances to a list of tuples of already used seeds and the configs that this inst-seed-pair has been evaluated on, sorted by the number of configs """ # We want to reuse seeds that have been used on most configurations # To this end, we create a dictionary as {instances:{seed:[configs]}} # Like this we can easily retrieve the most used instance-seed pairs to # minimize the number of runs to be evaluated inst_seed_config = {} if runhistory: relevant = dict() for key in runhistory.data: if (runhistory.ids_config[key.config_id] in configs and key.instance_id in insts): relevant[key] = runhistory.data[key] # Change data-structure to {instances:[(seed1, (configs)), (seed2, (configs), ... ]} # to make most used seed easily accessible, we sort after length of configs for key in relevant: inst, seed = key.instance_id, key.seed config = runhistory.ids_config[key.config_id] if inst in inst_seed_config: if seed in inst_seed_config[inst]: inst_seed_config[inst][seed].append(config) else: inst_seed_config[inst][seed] = [config] else: inst_seed_config[inst] = {seed: [config]} inst_seed_config = { i: sorted([(seed, list(inst_seed_config[i][seed])) for seed in inst_seed_config[i]], key=lambda x: len(x[1])) for i in inst_seed_config } return inst_seed_config def _get_configs(self, mode: str) -> typing.List[str]: """ Return desired configs Parameters ---------- mode: str str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time Returns ------- configs: list<Configuration> list with desired configurations """ # Add desired configs configs = [] mode = mode.lower() if mode not in [ 'def', 'inc', 'def+inc', 'wallclock_time', 'cpu_time', 'all' ]: raise ValueError( "%s not a valid option for config_mode in validation." % mode) if mode == "def" or mode == "def+inc": configs.append(self.scen.cs.get_default_configuration()) if mode == "inc" or mode == "def+inc": configs.append(self.traj[-1]["incumbent"]) if mode in ["wallclock_time", "cpu_time"]: # get highest time-entry and add entries from there # not using wallclock_limit in case it's inf if (mode == "wallclock_time" and np.isfinite(self.scen.wallclock_limit)): max_time = self.scen.wallclock_limit elif (mode == "cpu_time" and np.isfinite(self.scen.algo_runs_timelimit)): max_time = self.scen.algo_runs_timelimit else: max_time = self.traj[-1][mode] counter = 2**0 for entry in self.traj[::-1]: if (entry[mode] <= max_time / counter and entry["incumbent"] not in configs): configs.append(entry["incumbent"]) counter *= 2 if not self.traj[0]["incumbent"] in configs: configs.append(self.traj[0]["incumbent"]) # add first if mode == "all": for entry in self.traj: if not entry["incumbent"] in configs: configs.append(entry["incumbent"]) self.logger.debug("Gathered %d configurations for mode %s.", len(configs), mode) return configs def _get_instances(self, mode: str) -> typing.List[str]: """ Get desired instances Parameters ---------- mode: str what instances to use for validation, from [train, test, train+test] Returns ------- instances: list<str> instances to be used """ instance_mode = mode.lower() if mode not in ['train', 'test', 'train+test']: raise ValueError( "%s not a valid option for instance_mode in validation." % mode) # Make sure if instances matter, than instances should be passed if ((instance_mode == 'train' and self.scen.train_insts == [None]) or (instance_mode == 'test' and self.scen.test_insts == [None])): self.logger.warning( "Instance mode is set to %s, but there are no " "%s-instances specified in the scenario. Setting instance mode to" "\"train+test\"!", instance_mode, instance_mode) instance_mode = 'train+test' instances = [] if ((instance_mode == 'train' or instance_mode == 'train+test') and not self.scen.train_insts == [None]): instances.extend(self.scen.train_insts) if ((instance_mode == 'test' or instance_mode == 'train+test') and not self.scen.test_insts == [None]): instances.extend(self.scen.test_insts) return instances
def validate_epm( self, config_mode: Union[str, typing.List[Configuration]] = 'def', instance_mode: Union[str, typing.List[str]] = 'test', repetitions: int = 1, runhistory: RunHistory = None, output_fn="", reuse_epm=True, ) -> RunHistory: """ Use EPM to predict costs/runtimes for unknown config/inst-pairs. side effect: if output is specified, saves runhistory to specified output directory. Parameters ---------- output_fn: str path to runhistory to be saved. if the suffix is not '.json', will be interpreted as directory and filename will be 'validated_runhistory_EPM.json' config_mode: str or list<Configuration> string or directly a list of Configuration, string from [def, inc, def+inc, wallclock_time, cpu_time, all]. time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of repetitions in nondeterministic algorithms runhistory: RunHistory optional, RunHistory-object to reuse runs reuse_epm: bool if true (and if `self.epm`), reuse epm to validate runs Returns ------- runhistory: RunHistory runhistory with predicted runs """ if not isinstance(runhistory, RunHistory) and (self.epm is None or reuse_epm is False): raise ValueError( "No runhistory specified for validating with EPM!") elif reuse_epm is False or self.epm is None: # Create RandomForest types, bounds = get_types(self.scen.cs, self.scen.feature_array) self.epm = RandomForestWithInstances( configspace=self.scen.cs, types=types, bounds=bounds, instance_features=self.scen.feature_array, seed=self.rng.randint(MAXINT), ratio_features=1.0, ) # Use imputor if objective is runtime imputor = None impute_state = None impute_censored_data = False if self.scen.run_obj == 'runtime': threshold = self.scen.cutoff * self.scen.par_factor imputor = RFRImputator(rng=self.rng, cutoff=self.scen.cutoff, threshold=threshold, model=self.epm) impute_censored_data = True impute_state = [StatusType.CAPPED] # Transform training data (from given rh) rh2epm = RunHistory2EPM4Cost( num_params=len(self.scen.cs.get_hyperparameters()), scenario=self.scen, rng=self.rng, impute_censored_data=impute_censored_data, imputor=imputor, impute_state=impute_state) X, y = rh2epm.transform(runhistory) self.logger.debug("Training model with data of shape X: %s, y:%s", str(X.shape), str(y.shape)) # Train random forest self.epm.train(X, y) # Predict desired runs runs, rh_epm = self._get_runs(config_mode, instance_mode, repetitions, runhistory) feature_array_size = len(self.scen.cs.get_hyperparameters()) if self.scen.feature_array is not None: feature_array_size += self.scen.feature_array.shape[1] X_pred = np.empty((len(runs), feature_array_size)) for idx, run in enumerate(runs): if self.scen.feature_array is not None and run.inst is not None: X_pred[idx] = np.hstack([ convert_configurations_to_array([run.config])[0], self.scen.feature_dict[run.inst] ]) else: X_pred[idx] = convert_configurations_to_array([run.config])[0] self.logger.debug("Predicting desired %d runs, data has shape %s", len(runs), str(X_pred.shape)) y_pred = self.epm.predict(X_pred) # Add runs to runhistory for run, pred in zip(runs, y_pred[0]): rh_epm.add( config=run.config, cost=float(pred), time=float(pred), status=StatusType.SUCCESS, instance_id=run.inst, seed=-1, additional_info={"additional_info": "ESTIMATED USING EPM!"}) if output_fn: self._save_results(rh_epm, output_fn, backup_fn="validated_runhistory_EPM.json") return rh_epm
def _component_builder(self, conf: typing.Union[Configuration, dict]) \ -> typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]: """ builds new Acquisition function object and EPM object and returns these Parameters ---------- conf: typing.Union[Configuration, dict] configuration specificing "model" and "acq_func" Returns ------- typing.Tuple[AbstractAcquisitionFunction, AbstractEPM] """ types, bounds = get_types( self.config_space, instance_features=self.scenario.feature_array) if conf["model"] == "RF": model = RandomForestWithInstances( configspace=self.config_space, types=types, bounds=bounds, instance_features=self.scenario.feature_array, seed=self.rng.randint(MAXINT), pca_components=conf.get("pca_dim", self.scenario.PCA_DIM), log_y=conf.get("log_y", self.scenario.transform_y in ["LOG", "LOGS"]), num_trees=conf.get("num_trees", self.scenario.rf_num_trees), do_bootstrapping=conf.get("do_bootstrapping", self.scenario.rf_do_bootstrapping), ratio_features=conf.get("ratio_features", self.scenario.rf_ratio_features), min_samples_split=conf.get("min_samples_split", self.scenario.rf_min_samples_split), min_samples_leaf=conf.get("min_samples_leaf", self.scenario.rf_min_samples_leaf), max_depth=conf.get("max_depth", self.scenario.rf_max_depth), ) elif conf["model"] == "GP": from dsmac.epm.gp_kernels import ConstantKernel, HammingKernel, WhiteKernel, Matern cov_amp = ConstantKernel( 2.0, constant_value_bounds=(np.exp(-10), np.exp(2)), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng), ) cont_dims = np.nonzero(types == 0)[0] cat_dims = np.nonzero(types != 0)[0] if len(cont_dims) > 0: exp_kernel = Matern( np.ones([len(cont_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))], nu=2.5, operate_on=cont_dims, ) if len(cat_dims) > 0: ham_kernel = HammingKernel( np.ones([len(cat_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))], operate_on=cat_dims, ) noise_kernel = WhiteKernel( noise_level=1e-8, noise_level_bounds=(np.exp(-25), np.exp(2)), prior=HorseshoePrior(scale=0.1, rng=self.rng), ) if len(cont_dims) > 0 and len(cat_dims) > 0: # both kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel elif len(cont_dims) > 0 and len(cat_dims) == 0: # only cont kernel = cov_amp * exp_kernel + noise_kernel elif len(cont_dims) == 0 and len(cat_dims) > 0: # only cont kernel = cov_amp * ham_kernel + noise_kernel else: raise ValueError() n_mcmc_walkers = 3 * len(kernel.theta) if n_mcmc_walkers % 2 == 1: n_mcmc_walkers += 1 model = GaussianProcessMCMC( self.config_space, types=types, bounds=bounds, kernel=kernel, n_mcmc_walkers=n_mcmc_walkers, chain_length=250, burnin_steps=250, normalize_y=True, seed=self.rng.randint(low=0, high=10000), ) if conf["acq_func"] == "EI": acq = EI(model=model, par=conf.get("par_ei", 0)) elif conf["acq_func"] == "LCB": acq = LCB(model=model, par=conf.get("par_lcb", 0)) elif conf["acq_func"] == "PI": acq = PI(model=model, par=conf.get("par_pi", 0)) elif conf["acq_func"] == "LogEI": # par value should be in log-space acq = LogEI(model=model, par=conf.get("par_logei", 0)) return acq, model
def __init__( self, scenario: Scenario, # TODO: once we drop python3.4 add type hint # typing.Union[ExecuteTARun, callable] tae_runner=None, runhistory: RunHistory = None, intensifier: Intensifier = None, acquisition_function: AbstractAcquisitionFunction = None, model: AbstractEPM = None, runhistory2epm: AbstractRunHistory2EPM = None, initial_design: InitialDesign = None, initial_configurations: typing.List[Configuration] = None, stats: Stats = None, rng: np.random.RandomState = None, run_id: int = 1): """Constructor""" self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) aggregate_func = average_cost self.runhistory = None self.trajectory = None # initialize stats object if stats: self.stats = stats else: self.stats = Stats(scenario, file_system=scenario.file_system) self.output_dir = create_output_directory(scenario, run_id) scenario.write() # initialize empty runhistory if runhistory is None: runhistory = RunHistory(aggregate_func=aggregate_func, file_system=scenario.file_system) # inject aggr_func if necessary if runhistory.aggregate_func is None: runhistory.aggregate_func = aggregate_func # initial random number generator num_run, rng = self._get_rng(rng=rng) # reset random number generator in config space to draw different # random configurations with each seed given to SMAC scenario.cs.seed(rng.randint(MAXINT)) # initial Trajectory Logger traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats, file_system=scenario.file_system) # initial EPM types, bounds = get_types(scenario.cs, scenario.feature_array) if model is None: model = RandomForestWithInstances( configspace=scenario.cs, types=types, bounds=bounds, instance_features=scenario.feature_array, seed=rng.randint(MAXINT), pca_components=scenario.PCA_DIM, num_trees=scenario.rf_num_trees, do_bootstrapping=scenario.rf_do_bootstrapping, ratio_features=scenario.rf_ratio_features, min_samples_split=scenario.rf_min_samples_split, min_samples_leaf=scenario.rf_min_samples_leaf, max_depth=scenario.rf_max_depth, ) # initial acquisition function if acquisition_function is None: if scenario.run_obj == "runtime": acquisition_function = LogEI(model=model) else: acquisition_function = EI(model=model) # inject model if necessary if acquisition_function.model is None: acquisition_function.model = model # initialize optimizer on acquisition function local_search = LocalSearch( acquisition_function, scenario.cs, max_steps=scenario.sls_max_steps, n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk) # initialize tae_runner # First case, if tae_runner is None, the target algorithm is a call # string in the scenario file if tae_runner is None: tae_runner = ExecuteTARunOld( ta=scenario.ta, stats=self.stats, run_obj=scenario.run_obj, runhistory=runhistory, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) # Second case, the tae_runner is a function to be optimized elif callable(tae_runner): tae_runner = ExecuteTAFuncDict( ta=tae_runner, stats=self.stats, run_obj=scenario.run_obj, memory_limit=scenario.memory_limit, runhistory=runhistory, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) # Third case, if it is an ExecuteTaRun we can simply use the # instance. Otherwise, the next check raises an exception elif not isinstance(tae_runner, ExecuteTARun): raise TypeError("Argument 'tae_runner' is %s, but must be " "either a callable or an instance of " "ExecuteTaRun. Passing 'None' will result in the " "creation of target algorithm runner based on the " "call string in the scenario file." % type(tae_runner)) # Check that overall objective and tae objective are the same if tae_runner.run_obj != scenario.run_obj: raise ValueError("Objective for the target algorithm runner and " "the scenario must be the same, but are '%s' and " "'%s'" % (tae_runner.run_obj, scenario.run_obj)) # inject stats if necessary if tae_runner.stats is None: tae_runner.stats = self.stats # inject runhistory if necessary if tae_runner.runhistory is None: tae_runner.runhistory = runhistory # inject cost_for_crash if tae_runner.crash_cost != scenario.cost_for_crash: tae_runner.crash_cost = scenario.cost_for_crash # initialize intensification if intensifier is None: intensifier = Intensifier( tae_runner=tae_runner, stats=self.stats, traj_logger=traj_logger, rng=rng, instances=scenario.train_insts, cutoff=scenario.cutoff, deterministic=scenario.deterministic, run_obj_time=scenario.run_obj == "runtime", always_race_against=scenario.cs.get_default_configuration() if scenario.always_race_default else None, instance_specifics=scenario.instance_specific, minR=scenario.minR, maxR=scenario.maxR, adaptive_capping_slackfactor=scenario. intens_adaptive_capping_slackfactor, min_chall=scenario.intens_min_chall, distributer=scenario.distributer) # inject deps if necessary if intensifier.tae_runner is None: intensifier.tae_runner = tae_runner if intensifier.stats is None: intensifier.stats = self.stats if intensifier.traj_logger is None: intensifier.traj_logger = traj_logger # initial design if initial_design is not None and initial_configurations is not None: raise ValueError( "Either use initial_design or initial_configurations; but not both" ) if initial_configurations is not None: initial_design = InitialDesign(tae_runner=tae_runner, scenario=scenario, stats=self.stats, traj_logger=traj_logger, runhistory=runhistory, rng=rng, configs=initial_configurations, intensifier=intensifier, aggregate_func=aggregate_func) elif initial_design is None: if scenario.initial_incumbent == "DEFAULT": initial_design = DefaultConfiguration( tae_runner=tae_runner, scenario=scenario, stats=self.stats, traj_logger=traj_logger, runhistory=runhistory, rng=rng, intensifier=intensifier, aggregate_func=aggregate_func, max_config_fracs=0.0) elif scenario.initial_incumbent == "RANDOM": initial_design = RandomConfigurations( tae_runner=tae_runner, scenario=scenario, stats=self.stats, traj_logger=traj_logger, runhistory=runhistory, rng=rng, intensifier=intensifier, aggregate_func=aggregate_func, max_config_fracs=0.0) else: raise ValueError("Don't know what kind of initial_incumbent " "'%s' is" % scenario.initial_incumbent) # inject deps if necessary if initial_design.tae_runner is None: initial_design.tae_runner = tae_runner if initial_design.scenario is None: initial_design.scenario = scenario if initial_design.stats is None: initial_design.stats = self.stats if initial_design.traj_logger is None: initial_design.traj_logger = traj_logger # initial conversion of runhistory into EPM data if runhistory2epm is None: num_params = len(scenario.cs.get_hyperparameters()) if scenario.run_obj == "runtime": # if we log the performance data, # the RFRImputator will already get # log transform data from the runhistory cutoff = np.log(scenario.cutoff) threshold = np.log(scenario.cutoff * scenario.par_factor) imputor = RFRImputator(rng=rng, cutoff=cutoff, threshold=threshold, model=model, change_threshold=0.01, max_iter=2) runhistory2epm = RunHistory2EPM4LogCost( scenario=scenario, num_params=num_params, success_states=[ StatusType.SUCCESS, ], impute_censored_data=True, impute_state=[ StatusType.CAPPED, ], imputor=imputor) elif scenario.run_obj == 'quality': runhistory2epm = RunHistory2EPM4Cost( scenario=scenario, num_params=num_params, success_states=[ StatusType.SUCCESS, ], impute_censored_data=False, impute_state=None) else: raise ValueError('Unknown run objective: %s. Should be either ' 'quality or runtime.' % self.scenario.run_obj) # inject scenario if necessary: if runhistory2epm.scenario is None: runhistory2epm.scenario = scenario self.solver = EPILS_Solver(scenario=scenario, stats=self.stats, initial_design=initial_design, runhistory=runhistory, runhistory2epm=runhistory2epm, intensifier=intensifier, aggregate_func=aggregate_func, num_run=num_run, model=model, acq_optimizer=local_search, acquisition_func=acquisition_function, rng=rng)