def test_get_next_by_random_search_sorted(self, patch_sample, patch_ei, patch_impute): values = (10, 1, 9, 2, 8, 3, 7, 4, 6, 5) patch_sample.return_value = [ConfigurationMock(i) for i in values] patch_ei.return_value = np.array([[_] for _ in values], dtype=float) patch_impute.side_effect = lambda l: values cs = ConfigurationSpace() ei = EI(None) rs = RandomSearch(ei, cs) rval = rs._maximize( runhistory=None, stats=None, num_points=10, _sorted=True ) self.assertEqual(len(rval), 10) for i in range(10): self.assertIsInstance(rval[i][1], ConfigurationMock) self.assertEqual(rval[i][1].value, 10 - i) self.assertEqual(rval[i][0], 10 - i) self.assertEqual(rval[i][1].origin, 'Random Search (sorted)') # Check that config.get_array works as desired and imputation is used # in between, we therefore have to retrieve the value from the mock! np.testing.assert_allclose([v.value for v in patch_ei.call_args[0][0]], np.array(values, dtype=float))
def __init__( self, acquisition_function: AbstractAcquisitionFunction, config_space: ConfigurationSpace, rng: Union[bool, np.random.RandomState] = None, ): super().__init__(acquisition_function, config_space, rng) self.random_search = RandomSearch(acquisition_function, config_space, rng) self.local_search = LocalSearch(acquisition_function, config_space, rng) self.max_acq_value = sys.float_info.min
def test_get_next_by_random_search(self, patch): def side_effect(size): return [ConfigurationMock()] * size patch.side_effect = side_effect cs = ConfigurationSpace() ei = EI(None) rs = RandomSearch(ei, cs) rval = rs._maximize( runhistory=None, stats=None, num_points=10, _sorted=False ) self.assertEqual(len(rval), 10) for i in range(10): self.assertIsInstance(rval[i][1], ConfigurationMock) self.assertEqual(rval[i][1].origin, 'Random Search') self.assertEqual(rval[i][0], 0)
def __init__(self, scenario: Scenario, stats: Stats, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, model: RandomForestWithInstances, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, random_configuration_chooser: typing. Union[RandomConfigurationChooser] = ChooserNoCoolDown(2.0), predict_x_best: bool = True, min_samples_model: int = 1): self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.stats = stats self.runhistory = runhistory self.rh2EPM = runhistory2epm self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng self.random_configuration_chooser = random_configuration_chooser self._random_search = RandomSearch( acquisition_func, self.scenario.cs, # type: ignore[attr-defined] # noqa F821 rng, ) self.initial_design_configs = [] # type: typing.List[Configuration] self.predict_x_best = predict_x_best self.min_samples_model = min_samples_model self.currently_considered_budgets = [ 0.0, ]
def test_challenger_list_callback(self, patch_sample, patch_ei, patch_impute): values = (10, 1, 9, 2, 8, 3, 7, 4, 6, 5) patch_sample.return_value = ConfigurationMock(1) patch_ei.return_value = np.array([[_] for _ in values], dtype=float) patch_impute.side_effect = lambda l: values cs = ConfigurationSpace() ei = EI(None) rs = RandomSearch(ei, cs) rs._maximize = unittest.mock.Mock() rs._maximize.return_value = [(0, 0)] rval = rs.maximize( runhistory=None, stats=None, num_points=10, ) self.assertEqual(rs._maximize.call_count, 0) next(rval) self.assertEqual(rs._maximize.call_count, 1) random_configuration_chooser = unittest.mock.Mock() random_configuration_chooser.check.side_effect = [ True, False, False, False ] rs._maximize = unittest.mock.Mock() rs._maximize.return_value = [(0, 0), (1, 1)] rval = rs.maximize( runhistory=None, stats=None, num_points=10, random_configuration_chooser=random_configuration_chooser, ) self.assertEqual(rs._maximize.call_count, 0) # The first configuration is chosen at random (see the random_configuration_chooser mock) conf = next(rval) self.assertIsInstance(conf, ConfigurationMock) self.assertEqual(rs._maximize.call_count, 0) # The 2nd configuration triggers the call to the callback (see the random_configuration_chooser mock) conf = next(rval) self.assertEqual(rs._maximize.call_count, 1) self.assertEqual(conf, 0) # The 3rd configuration doesn't trigger the callback any more conf = next(rval) self.assertEqual(rs._maximize.call_count, 1) self.assertEqual(conf, 1) with self.assertRaises(StopIteration): next(rval)
def __init__(self, scenario: Scenario, stats: Stats, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, model: RandomForestWithInstances, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, random_configuration_chooser: typing. Union[RandomConfigurationChooser] = ChooserNoCoolDown(2.0), predict_x_best: bool = True, min_samples_model: int = 1): """ Interface to train the EPM and generate next configurations Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: smac.stats.stats.Stats statistics object with configuration budgets runhistory: smac.runhistory.runhistory.RunHistory runhistory with all runs so far model: smac.epm.rf_with_instances.RandomForestWithInstances empirical performance model (right now, we support only RandomForestWithInstances) acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer Optimizer of acquisition function. restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator random_configuration_chooser: Chooser for random configuration -- one of * ChooserNoCoolDown(modulus) * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus) predict_x_best: bool Choose x_best for computing the acquisition function via the model instead of via the observations. min_samples_model: int Minimum number of samples to build a model """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.stats = stats self.runhistory = runhistory self.rh2EPM = runhistory2epm self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng self.random_configuration_chooser = random_configuration_chooser self._random_search = RandomSearch( acquisition_func, self.scenario.cs, # type: ignore[attr-defined] # noqa F821 rng, ) self.initial_design_configs = [] # type: typing.List[Configuration] self.predict_x_best = predict_x_best self.min_samples_model = min_samples_model self.currently_considered_budgets = [ 0.0, ]
class EPMChooser(object): def __init__(self, scenario: Scenario, stats: Stats, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, model: RandomForestWithInstances, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, random_configuration_chooser: typing. Union[RandomConfigurationChooser] = ChooserNoCoolDown(2.0), predict_x_best: bool = True, min_samples_model: int = 1): """ Interface to train the EPM and generate next configurations Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: smac.stats.stats.Stats statistics object with configuration budgets runhistory: smac.runhistory.runhistory.RunHistory runhistory with all runs so far model: smac.epm.rf_with_instances.RandomForestWithInstances empirical performance model (right now, we support only RandomForestWithInstances) acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer Optimizer of acquisition function. restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator random_configuration_chooser: Chooser for random configuration -- one of * ChooserNoCoolDown(modulus) * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus) predict_x_best: bool Choose x_best for computing the acquisition function via the model instead of via the observations. min_samples_model: int Minimum number of samples to build a model """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.stats = stats self.runhistory = runhistory self.rh2EPM = runhistory2epm self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng self.random_configuration_chooser = random_configuration_chooser self._random_search = RandomSearch( acquisition_func, self.scenario.cs, # type: ignore[attr-defined] # noqa F821 rng, ) self.initial_design_configs = [] # type: typing.List[Configuration] self.predict_x_best = predict_x_best self.min_samples_model = min_samples_model self.currently_considered_budgets = [ 0.0, ] def _collect_data_to_train_model( self) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]: # if we use a float value as a budget, we want to train the model only on the highest budget available_budgets = [] for run_key in self.runhistory.data.keys(): available_budgets.append(run_key.budget) # Sort available budgets from highest to lowest budget available_budgets = sorted(list(set(available_budgets)), reverse=True) # Get #points per budget and if there are enough samples, then build a model for b in available_budgets: X, Y = self.rh2EPM.transform(self.runhistory, budget_subset=[ b, ]) if X.shape[0] >= self.min_samples_model: self.currently_considered_budgets = [ b, ] configs_array = self.rh2EPM.get_configurations( self.runhistory, budget_subset=self.currently_considered_budgets) return X, Y, configs_array return np.empty(shape=[0, 0]), np.empty(shape=[ 0, ]), np.empty(shape=[0, 0]) def _get_evaluated_configs(self) -> typing.List[Configuration]: return self.runhistory.get_all_configs_per_budget( budget_subset=self.currently_considered_budgets) def choose_next( self, incumbent_value: float = None) -> typing.Iterator[Configuration]: """Choose next candidate solution with Bayesian optimization. The suggested configurations depend on the argument ``acq_optimizer`` to the ``SMBO`` class. Parameters ---------- incumbent_value: float Cost value of incumbent configuration (required for acquisition function); If not given, it will be inferred from runhistory or predicted; if not given and runhistory is empty, it will raise a ValueError. Returns ------- Iterator """ self.logger.debug("Search for next configuration") X, Y, X_configurations = self._collect_data_to_train_model() if X.shape[0] == 0: # Only return a single point to avoid an overly high number of # random search iterations return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1) self.model.train(X, Y) if incumbent_value is not None: best_observation = incumbent_value x_best_array = None # type: typing.Optional[np.ndarray] else: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.") x_best_array, best_observation = self._get_x_best( self.predict_x_best, X_configurations) self.acquisition_func.update( model=self.model, eta=best_observation, incumbent_array=x_best_array, num_data=len(self._get_evaluated_configs()), X=X_configurations, ) challengers = self.acq_optimizer.maximize( runhistory=self.runhistory, stats=self.stats, num_points=self.scenario. acq_opt_challengers, # type: ignore[attr-defined] # noqa F821 random_configuration_chooser=self.random_configuration_chooser) return challengers def _get_x_best(self, predict: bool, X: np.ndarray) -> typing.Tuple[float, np.ndarray]: """Get value, configuration, and array representation of the "best" configuration. The definition of best varies depending on the argument ``predict``. If set to ``True``, this function will return the stats of the best configuration as predicted by the model, otherwise it will return the stats for the best observed configuration. Parameters ---------- predict : bool Whether to use the predicted or observed best. Returns ------- float np.ndarry Configuration """ if predict: costs = list( map( lambda x: ( self.model.predict_marginalized_over_instances( x.reshape((1, -1)))[0][0][0], x, ), X, )) costs = sorted(costs, key=lambda t: t[0]) x_best_array = costs[0][1] best_observation = costs[0][0] # won't need log(y) if EPM was already trained on log(y) else: all_configs = self.runhistory.get_all_configs_per_budget( budget_subset=self.currently_considered_budgets) x_best = self.incumbent x_best_array = convert_configurations_to_array(all_configs) best_observation = self.runhistory.get_cost(x_best) best_observation_as_array = np.array(best_observation).reshape( (1, 1)) # It's unclear how to do this for inv scaling and potential future scaling. # This line should be changed if necessary best_observation = self.rh2EPM.transform_response_values( best_observation_as_array) best_observation = best_observation[0][0] return x_best_array, best_observation
def __init__(self, scenario: Scenario, stats: Stats, initial_design: InitialDesign, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, intensifier: Intensifier, aggregate_func: callable, num_run: int, model: RandomForestWithInstances, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, random_configuration_chooser: typing.Union[ ChooserNoCoolDown, ChooserLinearCoolDown] = ChooserNoCoolDown(2.0), predict_incumbent: bool = True): """ Interface that contains the main Bayesian optimization loop Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: Stats statistics object with configuration budgets initial_design: InitialDesign initial sampling design runhistory: RunHistory runhistory with all runs so far runhistory2epm : AbstractRunHistory2EPM Object that implements the AbstractRunHistory2EPM to convert runhistory data into EPM data intensifier: Intensifier intensification of new challengers against incumbent configuration (probably with some kind of racing on the instances) aggregate_func: callable how to aggregate the runs in the runhistory to get the performance of a configuration num_run: int id of this run (used for pSMAC) model: RandomForestWithInstances empirical performance model (right now, we support only RandomForestWithInstances) acq_optimizer: AcquisitionFunctionMaximizer Optimizer of acquisition function. acquisition_function : AcquisitionFunction Object that implements the AbstractAcquisitionFunction (i.e., infill criterion for acq_optimizer) restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator random_configuration_chooser Chooser for random configuration -- one of * ChooserNoCoolDown(modulus) * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus) predict_incumbent: bool Use predicted performance of incumbent instead of observed performance """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.config_space = scenario.cs self.stats = stats self.initial_design = initial_design self.runhistory = runhistory self.rh2EPM = runhistory2epm self.intensifier = intensifier self.aggregate_func = aggregate_func self.num_run = num_run self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng self.random_configuration_chooser = random_configuration_chooser self._random_search = RandomSearch(acquisition_func, self.config_space, rng) self.predict_incumbent = predict_incumbent
class SMBO(object): """Interface that contains the main Bayesian optimization loop Attributes ---------- logger incumbent scenario config_space stats initial_design runhistory rh2EPM intensifier aggregate_func num_run model acq_optimizer acquisition_func rng random_configuration_chooser """ def __init__(self, scenario: Scenario, stats: Stats, initial_design: InitialDesign, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, intensifier: Intensifier, aggregate_func: callable, num_run: int, model: RandomForestWithInstances, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, random_configuration_chooser: typing.Union[ ChooserNoCoolDown, ChooserLinearCoolDown] = ChooserNoCoolDown(2.0), predict_incumbent: bool = True): """ Interface that contains the main Bayesian optimization loop Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: Stats statistics object with configuration budgets initial_design: InitialDesign initial sampling design runhistory: RunHistory runhistory with all runs so far runhistory2epm : AbstractRunHistory2EPM Object that implements the AbstractRunHistory2EPM to convert runhistory data into EPM data intensifier: Intensifier intensification of new challengers against incumbent configuration (probably with some kind of racing on the instances) aggregate_func: callable how to aggregate the runs in the runhistory to get the performance of a configuration num_run: int id of this run (used for pSMAC) model: RandomForestWithInstances empirical performance model (right now, we support only RandomForestWithInstances) acq_optimizer: AcquisitionFunctionMaximizer Optimizer of acquisition function. acquisition_function : AcquisitionFunction Object that implements the AbstractAcquisitionFunction (i.e., infill criterion for acq_optimizer) restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator random_configuration_chooser Chooser for random configuration -- one of * ChooserNoCoolDown(modulus) * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus) predict_incumbent: bool Use predicted performance of incumbent instead of observed performance """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.config_space = scenario.cs self.stats = stats self.initial_design = initial_design self.runhistory = runhistory self.rh2EPM = runhistory2epm self.intensifier = intensifier self.aggregate_func = aggregate_func self.num_run = num_run self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng self.random_configuration_chooser = random_configuration_chooser self._random_search = RandomSearch(acquisition_func, self.config_space, rng) self.predict_incumbent = predict_incumbent def start(self): """Starts the Bayesian Optimization loop. Detects whether we the optimization is restored from previous state. """ self.stats.start_timing() # Initialization, depends on input if self.stats.ta_runs == 0 and self.incumbent is None: self.incumbent = self.initial_design.run() elif self.stats.ta_runs > 0 and self.incumbent is None: raise ValueError( "According to stats there have been runs performed, " "but the optimizer cannot detect an incumbent. Did " "you set the incumbent (e.g. after restoring state)?") elif self.stats.ta_runs == 0 and self.incumbent is not None: raise ValueError( "An incumbent is specified, but there are no runs " "recorded in the Stats-object. If you're restoring " "a state, please provide the Stats-object.") else: # Restoring state! self.logger.info( "State Restored! Starting optimization with " "incumbent %s", self.incumbent) self.logger.info("State restored with following budget:") self.stats.print_stats() # To be on the safe side -> never return "None" as incumbent if not self.incumbent: self.incumbent = self.scenario.cs.get_default_configuration() def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # Main BO loop while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) if self.scenario.shared_model: pSMAC.write( run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run, logger=self.logger) logging.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent def choose_next(self, X: np.ndarray, Y: np.ndarray, incumbent_value: float = None): """Choose next candidate solution with Bayesian optimization. The suggested configurations depend on the argument ``acq_optimizer`` to the ``SMBO`` class. Parameters ---------- X : (N, D) numpy array Each row contains a configuration and one set of instance features. Y : (N, O) numpy array The function values for each configuration instance pair. incumbent_value: float Cost value of incumbent configuration (required for acquisition function); if not given, it will be inferred from runhistory; if not given and runhistory is empty, it will raise a ValueError Returns ------- Iterable """ if X.shape[0] == 0: # Only return a single point to avoid an overly high number of # random search iterations return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1) self.model.train(X, Y) if incumbent_value is None: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.") incumbent_value = self._get_incumbent_value() self.acquisition_func.update(model=self.model, eta=incumbent_value, num_data=len(self.runhistory.data)) challengers = self.acq_optimizer.maximize( runhistory=self.runhistory, stats=self.stats, num_points=self.scenario.acq_opt_challengers, random_configuration_chooser=self.random_configuration_chooser) return challengers def _get_incumbent_value(self): ''' get incumbent value either from runhistory or from best predicted performance on configs in runhistory (depends on self.predict_incumbent)" Return ------ float ''' if self.predict_incumbent: configs = convert_configurations_to_array( self.runhistory.get_all_configs()) costs = list( map( lambda config: self.model. predict_marginalized_over_instances(config.reshape( (1, -1)))[0][0][0], configs, )) incumbent_value = np.min(costs) # won't need log(y) if EPM was already trained on log(y) else: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.") incumbent_value = self.runhistory.get_cost(self.incumbent) # It's unclear how to do this for inv scaling and potential future scaling. This line should be changed if # necessary incumbent_value_as_array = np.array(incumbent_value).reshape( (1, 1)) incumbent_value = self.rh2EPM.transform_response_values( incumbent_value_as_array) incumbent_value = incumbent_value[0][0] return incumbent_value def validate(self, config_mode='inc', instance_mode='train+test', repetitions=1, use_epm=False, n_jobs=-1, backend='threading'): """Create validator-object and run validation, using scenario-information, runhistory from smbo and tae_runner from intensify Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms (in deterministic will be fixed to 1) use_epm: bool whether to use an EPM instead of evaluating all runs with the TAE n_jobs: int number of parallel processes used by joblib Returns ------- runhistory: RunHistory runhistory containing all specified runs """ if isinstance(config_mode, str): traj_fn = os.path.join(self.scenario.output_dir_for_this_run, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=self.scenario.cs) else: trajectory = None if self.scenario.output_dir_for_this_run: new_rh_path = os.path.join(self.scenario.output_dir_for_this_run, "validated_runhistory.json") else: new_rh_path = None validator = Validator(self.scenario, trajectory, self.rng) if use_epm: new_rh = validator.validate_epm(config_mode=config_mode, instance_mode=instance_mode, repetitions=repetitions, runhistory=self.runhistory, output_fn=new_rh_path) else: new_rh = validator.validate(config_mode, instance_mode, repetitions, n_jobs, backend, self.runhistory, self.intensifier.tae_runner, output_fn=new_rh_path) return new_rh def _get_timebound_for_intensification(self, time_spent: float): """Calculate time left for intensify from the time spent on choosing challengers using the fraction of time intended for intensification (which is specified in scenario.intensification_percentage). Parameters ---------- time_spent : float Returns ------- time_left : float """ frac_intensify = self.scenario.intensification_percentage if frac_intensify <= 0 or frac_intensify >= 1: raise ValueError("The value for intensification_percentage-" "option must lie in (0,1), instead: %.2f" % (frac_intensify)) total_time = time_spent / (1 - frac_intensify) time_left = frac_intensify * total_time self.logger.debug("Total time: %.4f, time spent on choosing next " "configurations: %.4f (%.2f), time left for " "intensification: %.4f (%.2f)" % (total_time, time_spent, (1 - frac_intensify), time_left, frac_intensify)) return time_left def _component_builder(self, conf:typing.Union[Configuration, dict]) \ -> typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]: """ builds new Acquisition function object and EPM object and returns these Parameters ---------- conf: typing.Union[Configuration, dict] configuration specificing "model" and "acq_func" Returns ------- typing.Tuple[AbstractAcquisitionFunction, AbstractEPM] """ types, bounds = get_types( self.config_space, instance_features=self.scenario.feature_array) if conf["model"] == "RF": model = RandomForestWithInstances( configspace=self.config_space, types=types, bounds=bounds, instance_features=self.scenario.feature_array, seed=self.rng.randint(MAXINT), pca_components=conf.get("pca_dim", self.scenario.PCA_DIM), log_y=conf.get("log_y", self.scenario.transform_y in ["LOG", "LOGS"]), num_trees=conf.get("num_trees", self.scenario.rf_num_trees), do_bootstrapping=conf.get("do_bootstrapping", self.scenario.rf_do_bootstrapping), ratio_features=conf.get("ratio_features", self.scenario.rf_ratio_features), min_samples_split=conf.get("min_samples_split", self.scenario.rf_min_samples_split), min_samples_leaf=conf.get("min_samples_leaf", self.scenario.rf_min_samples_leaf), max_depth=conf.get("max_depth", self.scenario.rf_max_depth), ) elif conf["model"] == "GP": from smac.epm.gp_kernels import ConstantKernel, HammingKernel, WhiteKernel, Matern cov_amp = ConstantKernel( 2.0, constant_value_bounds=(np.exp(-10), np.exp(2)), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng), ) cont_dims = np.nonzero(types == 0)[0] cat_dims = np.nonzero(types != 0)[0] if len(cont_dims) > 0: exp_kernel = Matern( np.ones([len(cont_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))], nu=2.5, operate_on=cont_dims, ) if len(cat_dims) > 0: ham_kernel = HammingKernel( np.ones([len(cat_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))], operate_on=cat_dims, ) noise_kernel = WhiteKernel( noise_level=1e-8, noise_level_bounds=(np.exp(-25), np.exp(2)), prior=HorseshoePrior(scale=0.1, rng=self.rng), ) if len(cont_dims) > 0 and len(cat_dims) > 0: # both kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel elif len(cont_dims) > 0 and len(cat_dims) == 0: # only cont kernel = cov_amp * exp_kernel + noise_kernel elif len(cont_dims) == 0 and len(cat_dims) > 0: # only cont kernel = cov_amp * ham_kernel + noise_kernel else: raise ValueError() n_mcmc_walkers = 3 * len(kernel.theta) if n_mcmc_walkers % 2 == 1: n_mcmc_walkers += 1 model = GaussianProcessMCMC( self.config_space, types=types, bounds=bounds, kernel=kernel, n_mcmc_walkers=n_mcmc_walkers, chain_length=250, burnin_steps=250, normalize_y=True, seed=self.rng.randint(low=0, high=10000), ) if conf["acq_func"] == "EI": acq = EI(model=model, par=conf.get("par_ei", 0)) elif conf["acq_func"] == "LCB": acq = LCB(model=model, par=conf.get("par_lcb", 0)) elif conf["acq_func"] == "PI": acq = PI(model=model, par=conf.get("par_pi", 0)) elif conf["acq_func"] == "LogEI": # par value should be in log-space acq = LogEI(model=model, par=conf.get("par_logei", 0)) return acq, model def _get_acm_cs(self): """ returns a configuration space designed for querying ~smac.optimizer.smbo._component_builder Returns ------- ConfigurationSpace """ cs = ConfigurationSpace() cs.seed(self.rng.randint(0, 2**20)) if 'gp' in smac.extras_installed: model = CategoricalHyperparameter("model", choices=("RF", "GP")) else: model = Constant("model", value="RF") num_trees = Constant("num_trees", value=10) bootstrap = CategoricalHyperparameter("do_bootstrapping", choices=(True, False), default_value=True) ratio_features = CategoricalHyperparameter("ratio_features", choices=(3 / 6, 4 / 6, 5 / 6, 1), default_value=1) min_split = UniformIntegerHyperparameter("min_samples_to_split", lower=1, upper=10, default_value=2) min_leaves = UniformIntegerHyperparameter("min_samples_in_leaf", lower=1, upper=10, default_value=1) cs.add_hyperparameters([ model, num_trees, bootstrap, ratio_features, min_split, min_leaves ]) inc_num_trees = InCondition(num_trees, model, ["RF"]) inc_bootstrap = InCondition(bootstrap, model, ["RF"]) inc_ratio_features = InCondition(ratio_features, model, ["RF"]) inc_min_split = InCondition(min_split, model, ["RF"]) inc_min_leavs = InCondition(min_leaves, model, ["RF"]) cs.add_conditions([ inc_num_trees, inc_bootstrap, inc_ratio_features, inc_min_split, inc_min_leavs ]) acq = CategoricalHyperparameter("acq_func", choices=("EI", "LCB", "PI", "LogEI")) par_ei = UniformFloatHyperparameter("par_ei", lower=-10, upper=10) par_pi = UniformFloatHyperparameter("par_pi", lower=-10, upper=10) par_logei = UniformFloatHyperparameter("par_logei", lower=0.001, upper=100, log=True) par_lcb = UniformFloatHyperparameter("par_lcb", lower=0.0001, upper=0.9999) cs.add_hyperparameters([acq, par_ei, par_pi, par_logei, par_lcb]) inc_par_ei = InCondition(par_ei, acq, ["EI"]) inc_par_pi = InCondition(par_pi, acq, ["PI"]) inc_par_logei = InCondition(par_logei, acq, ["LogEI"]) inc_par_lcb = InCondition(par_lcb, acq, ["LCB"]) cs.add_conditions([inc_par_ei, inc_par_pi, inc_par_logei, inc_par_lcb]) return cs
class SMBO(object): """Interface that contains the main Bayesian optimization loop Attributes ---------- logger incumbent scenario config_space stats initial_design runhistory rh2EPM intensifier aggregate_func num_run model acq_optimizer acquisition_func rng """ def __init__(self, scenario: Scenario, stats: Stats, initial_design: InitialDesign, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, intensifier: Intensifier, aggregate_func: callable, num_run: int, model: RandomForestWithInstances, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration=None): """ Interface that contains the main Bayesian optimization loop Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: Stats statistics object with configuration budgets initial_design: InitialDesign initial sampling design runhistory: RunHistory runhistory with all runs so far runhistory2epm : AbstractRunHistory2EPM Object that implements the AbstractRunHistory2EPM to convert runhistory data into EPM data intensifier: Intensifier intensification of new challengers against incumbent configuration (probably with some kind of racing on the instances) aggregate_func: callable how to aggregate the runs in the runhistory to get the performance of a configuration num_run: int id of this run (used for pSMAC) model: RandomForestWithInstances empirical performance model (right now, we support only RandomForestWithInstances) acq_optimizer: AcquisitionFunctionMaximizer Optimizer of acquisition function. acquisition_function : AcquisitionFunction Object that implements the AbstractAcquisitionFunction (i.e., infill criterion for acq_optimizer) restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator """ self.logger = logging.getLogger( self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.config_space = scenario.cs self.stats = stats self.initial_design = initial_design self.runhistory = runhistory self.rh2EPM = runhistory2epm self.intensifier = intensifier self.aggregate_func = aggregate_func self.num_run = num_run self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng self._random_search = RandomSearch( acquisition_func, self.config_space, rng ) def start(self): """Starts the Bayesian Optimization loop. Detects whether we the optimization is restored from previous state. """ self.stats.start_timing() # Initialization, depends on input if self.stats.ta_runs == 0 and self.incumbent is None: try: self.incumbent = self.initial_design.run() except FirstRunCrashedException as err: if self.scenario.abort_on_first_run_crash: raise elif self.stats.ta_runs > 0 and self.incumbent is None: raise ValueError("According to stats there have been runs performed, " "but the optimizer cannot detect an incumbent. Did " "you set the incumbent (e.g. after restoring state)?") elif self.stats.ta_runs == 0 and self.incumbent is not None: raise ValueError("An incumbent is specified, but there are no runs " "recorded in the Stats-object. If you're restoring " "a state, please provide the Stats-object.") else: # Restoring state! self.logger.info("State Restored! Starting optimization with " "incumbent %s", self.incumbent) self.logger.info("State restored with following budget:") self.stats.print_stats() def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # Main BO loop while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) if self.scenario.shared_model: pSMAC.write(run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run) logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent def choose_next(self, X: np.ndarray, Y: np.ndarray, incumbent_value: float=None): """Choose next candidate solution with Bayesian optimization. The suggested configurations depend on the argument ``acq_optimizer`` to the ``SMBO`` class. Parameters ---------- X : (N, D) numpy array Each row contains a configuration and one set of instance features. Y : (N, O) numpy array The function values for each configuration instance pair. incumbent_value: float Cost value of incumbent configuration (required for acquisition function); if not given, it will be inferred from runhistory; if not given and runhistory is empty, it will raise a ValueError Returns ------- Iterable """ import pdb pdb.set_trace() if X.shape[0] == 0: # Only return a single point to avoid an overly high number of # random search iterations return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1) self.model.train(X, Y) if incumbent_value is None: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of the incumbent is unknown.") incumbent_value = self.runhistory.get_cost(self.incumbent) self.acquisition_func.update(model=self.model, eta=incumbent_value) challengers = self.acq_optimizer.maximize(self.runhistory, self.stats, 5000) return challengers def validate(self, config_mode='inc', instance_mode='train+test', repetitions=1, use_epm=False, n_jobs=-1, backend='threading'): """Create validator-object and run validation, using scenario-information, runhistory from smbo and tae_runner from intensify Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms (in deterministic will be fixed to 1) use_epm: bool whether to use an EPM instead of evaluating all runs with the TAE n_jobs: int number of parallel processes used by joblib Returns ------- runhistory: RunHistory runhistory containing all specified runs """ traj_fn = os.path.join(self.scenario.output_dir_for_this_run, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=self.scenario.cs) new_rh_path = os.path.join(self.scenario.output_dir_for_this_run, "validated_runhistory.json") validator = Validator(self.scenario, trajectory, self.rng) if use_epm: new_rh = validator.validate_epm(config_mode=config_mode, instance_mode=instance_mode, repetitions=repetitions, runhistory=self.runhistory, output=new_rh_path) else: new_rh = validator.validate(config_mode, instance_mode, repetitions, n_jobs, backend, self.runhistory, self.intensifier.tae_runner, new_rh_path) return new_rh def _get_timebound_for_intensification(self, time_spent): """Calculate time left for intensify from the time spent on choosing challengers using the fraction of time intended for intensification (which is specified in scenario.intensification_percentage). Parameters ---------- time_spent : float Returns ------- time_left : float """ frac_intensify = self.scenario.intensification_percentage if frac_intensify <= 0 or frac_intensify >= 1: raise ValueError("The value for intensification_percentage-" "option must lie in (0,1), instead: %.2f" % (frac_intensify)) total_time = time_spent / (1 - frac_intensify) time_left = frac_intensify * total_time self.logger.debug("Total time: %.4f, time spent on choosing next " "configurations: %.4f (%.2f), time left for " "intensification: %.4f (%.2f)" % (total_time, time_spent, (1 - frac_intensify), time_left, frac_intensify)) return time_left
class InterleavedLocalAndRandomSearch(AcquisitionFunctionMaximizer): """Implements SMAC's default acquisition function optimization. This optimizer performs local search from the previous best points according, to the acquisition function, uses the acquisition function to sort randomly sampled configurations and interleaves unsorted, randomly sampled configurations in between. Parameters ---------- acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction config_space : ~smac.configspace.ConfigurationSpace rng : np.random.RandomState or int, optional """ def __init__( self, acquisition_function: AbstractAcquisitionFunction, config_space: ConfigurationSpace, rng: Union[bool, np.random.RandomState] = None, ): super().__init__(acquisition_function, config_space, rng) self.random_search = RandomSearch(acquisition_function, config_space, rng) self.local_search = LocalSearch(acquisition_function, config_space, rng) self.max_acq_value = sys.float_info.min def maximize(self, runhistory: RunHistory, stats: Stats, num_points: int, *args) -> Iterable[Configuration]: next_configs_by_local_search = self.local_search._maximize( runhistory, stats, 10, ) # Get configurations sorted by EI next_configs_by_random_search_sorted = self.random_search._maximize( runhistory, stats, num_points - len(next_configs_by_local_search), _sorted=True, ) # Having the configurations from random search, sorted by their # acquisition function value is important for the first few iterations # of SMAC. As long as the random forest predicts constant value, we # want to use only random configurations. Having them at the begging of # the list ensures this (even after adding the configurations by local # search, and then sorting them) next_configs_by_acq_value = (next_configs_by_random_search_sorted + next_configs_by_local_search) next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0]) self.logger.debug( "First 10 acq func (origin) values of selected configurations: %s", str([[_[0], _[1].origin] for _ in next_configs_by_acq_value[:10]])) # store the max last expansion (challengers generation) self.max_acq_value = next_configs_by_acq_value[0][0] next_configs_by_acq_value = [_[1] for _ in next_configs_by_acq_value] challengers = ChallengerList(next_configs_by_acq_value, self.config_space) return challengers def _maximize(self, runhistory: RunHistory, stats: Stats, num_points: int) -> Iterable[Tuple[float, Configuration]]: raise NotImplementedError()
def __init__(self, scenario: Scenario, tae_runner: ExecuteTARun = None, runhistory: RunHistory = None, intensifier: Intensifier = None, initial_design: InitialDesign = None, initial_configurations: typing.List[Configuration] = None, stats: Stats = None, rng: np.random.RandomState = None, run_id: int = 1): """ Constructor Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object tae_runner: smac.tae.execute_ta_run.ExecuteTARun or callable Callable or implementation of :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a callable is passed it will be wrapped by :class:`~smac.tae.execute_func.ExecuteTAFuncDict`. If not set, it will be initialized with the :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`. runhistory: RunHistory Runhistory to store all algorithm runs intensifier: Intensifier intensification object to issue a racing to decide the current incumbent initial_design: InitialDesign initial sampling design initial_configurations: typing.List[Configuration] list of initial configurations for initial design -- cannot be used together with initial_design stats: Stats optional stats object rng: np.random.RandomState Random number generator run_id: int, (default: 1) Run ID will be used as subfolder for output_dir. """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) # initial random number generator _, rng = get_rng(rng=rng, logger=self.logger) # initial conversion of runhistory into EPM data # since ROAR does not really use it the converted data # we simply use a cheap RunHistory2EPM here num_params = len(scenario.cs.get_hyperparameters()) runhistory2epm = RunHistory2EPM4Cost(scenario=scenario, num_params=num_params, success_states=[ StatusType.SUCCESS, ], impute_censored_data=False, impute_state=None) aggregate_func = average_cost # initialize empty runhistory if runhistory is None: runhistory = RunHistory(aggregate_func=aggregate_func) # inject aggr_func if necessary if runhistory.aggregate_func is None: runhistory.aggregate_func = aggregate_func self.stats = Stats(scenario) rs = RandomSearch( acquisition_function=None, config_space=scenario.cs, ) # use SMAC facade super().__init__( scenario=scenario, tae_runner=tae_runner, runhistory=runhistory, intensifier=intensifier, runhistory2epm=runhistory2epm, initial_design=initial_design, initial_configurations=initial_configurations, stats=stats, rng=rng, run_id=run_id, acquisition_function_optimizer=rs, )
def __init__( self, scenario: Scenario, stats: Stats, initial_design: InitialDesign, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, intensifier: Intensifier, aggregate_func: callable, num_run: int, model: AbstractEPM, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, # 强行在smbo中加入训练集和验证集 hoag: AbstractHOAG = None, # 参数服务器worker的脚本文件路径 #server: Server = None, bayesian_optimization: bool = False): """ Interface that contains the main Bayesian optimization loop Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: Stats statistics object with configuration budgets initial_design: InitialDesign initial sampling design runhistory: RunHistory runhistory with all runs so far runhistory2epm : AbstractRunHistory2EPM Object that implements the AbstractRunHistory2EPM to convert runhistory data into EPM data intensifier: Intensifier intensification of new challengers against incumbent configuration (probably with some kind of racing on the instances) aggregate_func: callable how to aggregate the runs in the runhistory to get the performance of a configuration num_run: int id of this run (used for pSMAC) model: AbstractEPM empirical performance model (right now, we support only AbstractEPM) acq_optimizer: AcquisitionFunctionMaximizer Optimizer of acquisition function. acquisition_function : AcquisitionFunction Object that implements the AbstractAcquisitionFunction (i.e., infill criterion for acq_optimizer) restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.config_space = scenario.cs self.stats = stats self.initial_design = initial_design self.runhistory = runhistory self.rh2EPM = runhistory2epm self.intensifier = intensifier self.aggregate_func = aggregate_func self.num_run = num_run self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng # hoag的类,直接使用hoag的fit,predict等 self.hoag = hoag # 保存server端进程 #self.server = server self.server = None self.bayesian_optimization = bayesian_optimization self._random_search = RandomSearch(acquisition_func, self.config_space, rng)
class SMBO(object): """Interface that contains the main Bayesian optimization loop Attributes ---------- logger incumbent scenario config_space stats initial_design runhistory rh2EPM intensifier aggregate_func num_run model acq_optimizer acquisition_func rng """ def __init__( self, scenario: Scenario, stats: Stats, initial_design: InitialDesign, runhistory: RunHistory, runhistory2epm: AbstractRunHistory2EPM, intensifier: Intensifier, aggregate_func: callable, num_run: int, model: AbstractEPM, acq_optimizer: AcquisitionFunctionMaximizer, acquisition_func: AbstractAcquisitionFunction, rng: np.random.RandomState, restore_incumbent: Configuration = None, # 强行在smbo中加入训练集和验证集 hoag: AbstractHOAG = None, # 参数服务器worker的脚本文件路径 #server: Server = None, bayesian_optimization: bool = False): """ Interface that contains the main Bayesian optimization loop Parameters ---------- scenario: smac.scenario.scenario.Scenario Scenario object stats: Stats statistics object with configuration budgets initial_design: InitialDesign initial sampling design runhistory: RunHistory runhistory with all runs so far runhistory2epm : AbstractRunHistory2EPM Object that implements the AbstractRunHistory2EPM to convert runhistory data into EPM data intensifier: Intensifier intensification of new challengers against incumbent configuration (probably with some kind of racing on the instances) aggregate_func: callable how to aggregate the runs in the runhistory to get the performance of a configuration num_run: int id of this run (used for pSMAC) model: AbstractEPM empirical performance model (right now, we support only AbstractEPM) acq_optimizer: AcquisitionFunctionMaximizer Optimizer of acquisition function. acquisition_function : AcquisitionFunction Object that implements the AbstractAcquisitionFunction (i.e., infill criterion for acq_optimizer) restore_incumbent: Configuration incumbent to be used from the start. ONLY used to restore states. rng: np.random.RandomState Random number generator """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.incumbent = restore_incumbent self.scenario = scenario self.config_space = scenario.cs self.stats = stats self.initial_design = initial_design self.runhistory = runhistory self.rh2EPM = runhistory2epm self.intensifier = intensifier self.aggregate_func = aggregate_func self.num_run = num_run self.model = model self.acq_optimizer = acq_optimizer self.acquisition_func = acquisition_func self.rng = rng # hoag的类,直接使用hoag的fit,predict等 self.hoag = hoag # 保存server端进程 #self.server = server self.server = None self.bayesian_optimization = bayesian_optimization self._random_search = RandomSearch(acquisition_func, self.config_space, rng) def start(self): """Starts the Bayesian Optimization loop. Detects whether we the optimization is restored from previous state. """ self.stats.start_timing() # Initialization, depends on input if self.stats.ta_runs == 0 and self.incumbent is None: try: if self.server is None: self.incumbent = self.initial_design.run() else: # 由worker自己产生第一个incumbent,然后由server接收其中的一个 self.incumbent, new_runhistory = self.server.pull() self.runhistory.update(new_runhistory) except FirstRunCrashedException as err: if self.scenario.abort_on_first_run_crash: raise elif self.stats.ta_runs > 0 and self.incumbent is None: raise ValueError( "According to stats there have been runs performed, " "but the optimizer cannot detect an incumbent. Did " "you set the incumbent (e.g. after restoring state)?") elif self.stats.ta_runs == 0 and self.incumbent is not None: raise ValueError( "An incumbent is specified, but there are no runs " "recorded in the Stats-object. If you're restoring " "a state, please provide the Stats-object.") else: # Restoring state! self.logger.info( "State Restored! Starting optimization with " "incumbent %s", self.incumbent) self.logger.info("State restored with following budget:") self.stats.print_stats() def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # 设置一个counter counter = 0 # Main BO loop while True: # 打印每轮SMBO的最优结果(包括首轮SMBO 0) print('SMBO ' + str(counter) + ': ' + str(self.runhistory.get_cost(self.incumbent))) counter += 1 if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") if self.server is None: self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) else: # 从worker读取loss,加入history再运行新的challengers print(time_left) self.server.push(incumbent=self.incumbent, runhistory=self.runhistory, challengers=challengers.challengers, time_left=time_left) # 从worker读取runhistory,并merge到self.runhistory incumbent, new_runhistory = self.server.pull() self.runhistory.update(new_runhistory) # 更新了runhistory之后,应该找寻是否存在新的incumbent # 因为worker没有完整的 runhistory_old = self.runhistory.get_history_for_config( self.incumbent) runhistory_new = self.runhistory.get_history_for_config( incumbent) # 找寻cost最小值 lowest_cost_old = min([cost[0] for cost in runhistory_old]) lowest_cost_new = min([cost[0] for cost in runhistory_new]) if lowest_cost_new < lowest_cost_old: # 替换为新的incumbent self.incumbent = incumbent """可以考虑用这个函数 new_incumbent = self._compare_configs( incumbent=incumbent, challenger=challenger, run_history=run_history, aggregate_func=aggregate_func, log_traj=log_traj) """ if self.scenario.shared_model: pSMAC.write( run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run) logging.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent def choose_next(self, X: np.ndarray, Y: np.ndarray, incumbent_value: float = None): """Choose next candidate solution with Bayesian optimization. The suggested configurations depend on the argument ``acq_optimizer`` to the ``SMBO`` class. Parameters ---------- X : (N, D) numpy array Each row contains a configuration and one set of instance features. Y : (N, O) numpy array The function values for each configuration instance pair. incumbent_value: float Cost value of incumbent configuration (required for acquisition function); if not given, it will be inferred from runhistory; if not given and runhistory is empty, it will raise a ValueError Returns ------- Iterable """ if X.shape[0] == 0: # Only return a single point to avoid an overly high number of # random search iterations return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1) # 消去完全相同的行 X, Y = remove_same_values(X, Y) print(X.shape) # 如果指定了hoag函数,则进行调用 if self.hoag is not None: # 初始化梯度数组 gradient = np.zeros(X.shape) # 对每组X,计算对应的梯度(此处有大量重复计算) for i in range(X.shape[0]): self.hoag.fit(X[i, :]) gradient[i, :] = self.hoag.predict_gradient() X = X.flatten() ind = np.argsort(X) gradient = gradient.flatten()[ind].reshape(-1, 1) X = X[ind].reshape(-1, 1) Y = Y.flatten()[ind].reshape(-1, 1) self.model.train(X, Y, gradient=gradient) elif self.bayesian_optimization: # gpr使用的参数 gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2} # 从configspace读取超参的范围 pbounds = {} for key in self.scenario.cs._hyperparameters.keys(): # 只处理float类型的超参 hyperparamter = self.scenario.cs._hyperparameters[key], if isinstance(hyperparamter.default_value, float): pbounds[key] = (hyperparamter.lower, hyperparamter.upper) # 初始化bayesian_optimization bo = BayesianOptimization(X, Y, pbounds=pbounds, verbose=False) # 预测下一个ei取得点 newX = bo.maximize(acq="ei", **gp_params) # 将超参数组再转化为Configuration challengers = [Configuration(self.scenario.cs, x) for x in newX] return challengers else: self.model.train(X, Y) # 打印X和Y的值 # print("X: ", X.flatten()) # print("Y: ", Y.flatten()) # print("Y_pred: ", self.model.predict(X)) # if self.hoag is not None: # print("G: ", gradient) if incumbent_value is None: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.") incumbent_value = self.runhistory.get_cost(self.incumbent) self.acquisition_func.update(model=self.model, eta=incumbent_value) challengers = self.acq_optimizer.maximize( # 初始为5000,提升速度调成500 self.runhistory, self.stats, 500) return challengers def validate(self, config_mode='inc', instance_mode='train+test', repetitions=1, use_epm=False, n_jobs=-1, backend='threading'): """Create validator-object and run validation, using scenario-information, runhistory from smbo and tae_runner from intensify Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms (in deterministic will be fixed to 1) use_epm: bool whether to use an EPM instead of evaluating all runs with the TAE n_jobs: int number of parallel processes used by joblib Returns ------- runhistory: RunHistory runhistory containing all specified runs """ traj_fn = os.path.join(self.scenario.output_dir_for_this_run, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=self.scenario.cs) new_rh_path = os.path.join(self.scenario.output_dir_for_this_run, "validated_runhistory.json") validator = Validator(self.scenario, trajectory, self.rng) if use_epm: new_rh = validator.validate_epm(config_mode=config_mode, instance_mode=instance_mode, repetitions=repetitions, runhistory=self.runhistory, output=new_rh_path) else: new_rh = validator.validate(config_mode, instance_mode, repetitions, n_jobs, backend, self.runhistory, self.intensifier.tae_runner, output=new_rh_path) return new_rh def _get_timebound_for_intensification(self, time_spent): """Calculate time left for intensify from the time spent on choosing challengers using the fraction of time intended for intensification (which is specified in scenario.intensification_percentage). Parameters ---------- time_spent : float Returns ------- time_left : float """ frac_intensify = self.scenario.intensification_percentage if frac_intensify <= 0 or frac_intensify >= 1: raise ValueError("The value for intensification_percentage-" "option must lie in (0,1), instead: %.2f" % (frac_intensify)) total_time = time_spent / (1 - frac_intensify) time_left = frac_intensify * total_time self.logger.debug("Total time: %.4f, time spent on choosing next " "configurations: %.4f (%.2f), time left for " "intensification: %.4f (%.2f)" % (total_time, time_spent, (1 - frac_intensify), time_left, frac_intensify)) return time_left