def _aggregate(self, runs): """ """ orig_rh, vali_rh = RunHistory(average_cost), RunHistory(average_cost) for run in runs: orig_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) vali_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: vali_rh.update(run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) for rh_name, rh in [("original", orig_rh), ("validated", vali_rh), ]: self.logger.debug('Combined number of %s RunHistory data points: %d ' '# Configurations: %d. # Configurator runs: %d', rh_name, len(rh.data), len(rh.get_all_configs()), len(runs)) traj = combine_trajectories([run.trajectory for run in runs], self.logger) path_to_folder = runs[0].path_to_folder if len(set([r.path_to_folder for r in runs])) == 1 else None budget = runs[0].budget if len(set([r.budget for r in runs])) == 1 else None new_cr = ConfiguratorRun(runs[0].scenario, orig_rh, vali_rh, traj, self.analyzing_options, output_dir=self.output_dir, path_to_folder=path_to_folder, budget=budget, ) return new_cr
def test_merge_foreign_data(self): ''' test smac.utils.merge_foreign_data ''' scenario = Scenario(self.test_scenario_dict) scenario_2 = Scenario(self.test_scenario_dict) scenario_2.feature_dict = {"inst_new": [4]} # init cs cs = ConfigurationSpace() cs.add_hyperparameter(UniformIntegerHyperparameter(name='a', lower=0, upper=100)) cs.add_hyperparameter(UniformIntegerHyperparameter(name='b', lower=0, upper=100)) # build runhistory rh_merge = RunHistory() config = Configuration(cs, values={'a': 1, 'b': 2}) rh_merge.add(config=config, instance_id="inst_new", cost=10, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) # "d" is an instance in <scenario> rh_merge.add(config=config, instance_id="d", cost=5, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) # build empty rh rh_base = RunHistory() merge_foreign_data(scenario=scenario, runhistory=rh_base, in_scenario_list=[scenario_2], in_runhistory_list=[rh_merge]) # both runs should be in the runhistory # but we should not use the data to update the cost of config self.assertTrue(len(rh_base.data) == 2) self.assertTrue(np.isnan(rh_base.get_cost(config))) # we should not get direct access to external run data runs = rh_base.get_runs_for_config(config, only_max_observed_budget=True) self.assertTrue(len(runs) == 0) rh_merge.add(config=config, instance_id="inst_new_2", cost=10, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) self.assertRaises(ValueError, merge_foreign_data, **{ "scenario": scenario, "runhistory": rh_base, "in_scenario_list": [scenario_2], "in_runhistory_list": [rh_merge]})
def test_get_config_runs(self): ''' get some config runs from runhistory ''' # return max observed budget only rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 3}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=2) with self.assertRaisesRegex(ValueError, 'This should not happen!'): rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=2, seed=2, budget=1) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) ist = rh.get_runs_for_config(config=config1, only_max_observed_budget=True) self.assertEqual(len(ist), 2) self.assertEqual(ist[0].instance, 1) self.assertEqual(ist[1].instance, 2) self.assertEqual(ist[0].budget, 2) self.assertEqual(ist[1].budget, 1) # multiple budgets (only_max_observed_budget=False) rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 3}) rh.add(config=config1, cost=5, time=10, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=2) rh.add(config=config2, cost=5, time=10, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=2) ist = rh.get_runs_for_config(config=config1, only_max_observed_budget=False) self.assertEqual(len(ist), 2) self.assertEqual(ist[0].instance, 1) self.assertEqual(ist[0].budget, 1) self.assertEqual(ist[1].budget, 2)
def _aggregate(self, runs): # path_to_folder is the concatenation of all the paths of the individual runs path_to_folder = '-'.join( sorted(list(set([r.path_to_folder for r in runs])))) # budgets are the union of individual budgets. if they are not the same for all runs (no usecase atm), # they get an additional entry of the hash over the string of the combination to avoid false-positives budgets = [r.reduced_to_budgets for r in runs] budget_hash = ['budgetmix-%d' % (hash(str(budgets))) ] if len(set([frozenset(b) for b in budgets])) != 1 else [] budgets = [ a for b in [x for x in budgets if x is not None] for a in b ] + budget_hash if ConfiguratorRun.identify(path_to_folder, budgets) in self.cache: return self.cache[ConfiguratorRun.identify(path_to_folder, budgets)] orig_rh, vali_rh = RunHistory(), RunHistory() for run in runs: orig_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) vali_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: vali_rh.update(run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) for rh_name, rh in [ ("original", orig_rh), ("validated", vali_rh), ]: self.logger.debug( 'Combined number of %s RunHistory data points: %d ' '# Configurations: %d. # Configurator runs: %d', rh_name, len(rh.data), len(rh.get_all_configs()), len(runs)) traj = combine_trajectories([run.trajectory for run in runs], self.logger) new_cr = ConfiguratorRun( runs[0].scenario, orig_rh, vali_rh, traj, self.analyzing_options, output_dir=self.output_dir, path_to_folder=path_to_folder, reduced_to_budgets=budgets, ) self._cache(new_cr) return new_cr
def test_choose_next_2(self): def side_effect(X, derivative): return np.mean(X, axis=1).reshape((-1, 1)) smbo = SMAC(self.scenario, rng=1).solver smbo.incumbent = self.scenario.cs.sample_configuration() smbo.runhistory = RunHistory(aggregate_func=average_cost) smbo.model = mock.Mock(spec=RandomForestWithInstances) smbo.acquisition_func._compute = mock.Mock( spec=RandomForestWithInstances) smbo.acquisition_func._compute.side_effect = side_effect X = smbo.rng.rand(10, 2) Y = smbo.rng.rand(10, 1) x = smbo.choose_next(X, Y) self.assertEqual(smbo.model.train.call_count, 1) self.assertEqual(len(x), 2020) num_random_search = 0 num_local_search = 0 for i in range(0, 2020, 2): #print(x[i].origin) self.assertIsInstance(x[i], Configuration) if 'Random Search (sorted)' in x[i].origin: num_random_search += 1 elif 'Local Search' in x[i].origin: num_local_search += 1 # number of local search configs has to be least 10 # since x can have duplicates # which can be associated with the local search self.assertGreaterEqual(num_local_search, 10) for i in range(1, 2020, 2): self.assertIsInstance(x[i], Configuration) self.assertEqual(x[i].origin, 'Random Search')
def test_multiple_budgets(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) self.assertEqual(rh.get_cost(config1), 10) # only the higher budget gets included in the config cost rh.add(config=config1, cost=20, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=2) self.assertEqual(rh.get_cost(config1), 20) self.assertEqual(rh.get_min_cost(config1), 10)
def test_get_configs_per_budget(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 1}) rh.add(config=config1, cost=10, time=10, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) config2 = Configuration(cs, values={'a': 2, 'b': 2}) rh.add(config=config2, cost=20, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1) config3 = Configuration(cs, values={'a': 3, 'b': 3}) rh.add(config=config3, cost=30, time=30, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=3) self.assertListEqual(rh.get_all_configs_per_budget([1]), [config1, config2])
def test_incremental_update(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={"a": 1, "b": 2}) rh.add( config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, ) self.assertEqual(rh.get_cost(config1), 10) rh.add( config=config1, cost=20, time=20, status=StatusType.SUCCESS, instance_id=2, seed=1, ) self.assertEqual(rh.get_cost(config1), 15)
def _get_bohb_avg(self, validator, runs, rh): if len(runs) > 1 and self.bohb_result: # Add bohb-specific line # Get collective rh rh_bohb = RunHistory(average_cost) for run in runs: rh_bohb.update(run.combined_runhistory) #self.logger.debug(rh_bohb.data) # Get collective trajectory traj = HpBandSter2SMAC().get_trajectory({'': self.bohb_result}, '', self.scenario, rh_bohb) #self.logger.debug(traj) mean, time, configs = [], [], [] traj_dict = self.bohb_result.get_incumbent_trajectory() mean, _, time, configs = self._get_mean_var_time( validator, traj, False, rh_bohb) configs, time, budget, mean = traj_dict['config_ids'], traj_dict[ 'times_finished'], traj_dict['budgets'], traj_dict['losses'] time_double = [t for sub in zip(time, time) for t in sub][1:] mean_double = [t for sub in zip(mean, mean) for t in sub][:-1] configs_double = [c for sub in zip(configs, configs) for c in sub][:-1] return Line('all_budgets', time_double, mean_double, mean_double, mean_double, configs_double)
def test_multiple_budgets(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={"a": 1, "b": 2}) rh.add( config=config1, cost=[10, 50], time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1, ) self.assertEqual(rh.get_cost(config1), 1.0) # Only the higher budget gets included in the config cost # However, we expect that the bounds are changed rh.add( config=config1, cost=[20, 25], time=25, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=5, ) self.assertEqual(rh.get_cost(config1), 0.5)
def test_local_search_finds_minimum(self): class AcquisitionFunction: model = None def __call__(self, arrays): rval = [] for array in arrays: rval.append([-rosenbrock_4d(array)]) return np.array(rval) ls = LocalSearch( acquisition_function=AcquisitionFunction(), config_space=self.cs, n_steps_plateau_walk=10, max_steps=np.inf, ) runhistory = RunHistory() self.cs.seed(1) random_configs = self.cs.sample_configuration(size=100) costs = [ rosenbrock_4d(random_config) for random_config in random_configs ] self.assertGreater(np.min(costs), 100) for random_config, cost in zip(random_configs, costs): runhistory.add(config=random_config, cost=cost, time=0, status=StatusType.SUCCESS) minimizer = ls.maximize(runhistory, None, 10) minima = [-rosenbrock_4d(m) for m in minimizer] self.assertGreater(minima[0], -0.05)
def test_multi_config_design(self): stats = Stats(scenario=self.scenario) stats.start_timing() self.ta.stats = stats tj = TrajLogger(output_dir=None, stats=stats) rh = RunHistory(aggregate_func=average_cost) self.ta.runhistory = rh rng = np.random.RandomState(seed=12345) intensifier = Intensifier(tae_runner=self.ta, stats=stats, traj_logger=tj, rng=rng, instances=[None], run_obj_time=False) configs = [ Configuration(configuration_space=self.cs, values={"x1": 4}), Configuration(configuration_space=self.cs, values={"x1": 2}) ] dc = MultiConfigInitialDesign(tae_runner=self.ta, scenario=self.scenario, stats=stats, traj_logger=tj, runhistory=rh, rng=rng, configs=configs, intensifier=intensifier, aggregate_func=average_cost) inc = dc.run() self.assertTrue(stats.ta_runs == 2) self.assertTrue(len(rh.data) == 2) self.assertTrue(rh.get_cost(inc) == 4)
def test_full_update(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 3}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config2, cost=20, time=20, status=StatusType.SUCCESS, instance_id=2, seed=2) cost_config2 = rh.get_cost(config2) rh.compute_all_costs() updated_cost_config2 = rh.get_cost(config2) self.assertTrue(cost_config2 == updated_cost_config2) rh.compute_all_costs(instances=[2]) updated_cost_config2 = rh.get_cost(config2) self.assertTrue(cost_config2 != updated_cost_config2) self.assertTrue(updated_cost_config2 == 20)
def transform_smac(optimizer, variables, X, Y): from ..optimizers import SMACBayesianOptimizer from . import ContinuousVariable if not isinstance(optimizer, SMACBayesianOptimizer): return X, Y # We create run history, fill it and transform its data with rh2epm # It is usual pipeline for SMAC config_space = optimizer.get_config_space(variables=variables) rh2epm = optimizer.get_runhistory2epm( scenario=optimizer.get_scenario( maxeval=None, config_space=config_space ) ) runhistory = RunHistory() config = config_space.sample_configuration(1) for x, y in zip(X, Y): for var, value in zip(variables, x): if isinstance(var, ContinuousVariable): config[var.name] = float(value) else: config[var.name] = value runhistory.add( config=copy.copy(config), cost=y, time=0, status=StatusType.SUCCESS ) X, Y = rh2epm.transform(runhistory) return X, Y.flatten()
def test_passed_runhistory_deterministic(self): ''' test if passed runhistory is in resulting runhistory ''' scen = Scenario(self.scen_fn, cmd_options={ 'run_obj': 'quality', 'train_insts': self.train_insts, 'deterministic': True }) scen.instance_specific = self.inst_specs validator = Validator(scen, self.trajectory, self.rng) # Add a few runs and check, if they are correctly processed old_configs = [entry["incumbent"] for entry in self.trajectory] old_rh = RunHistory() for config in old_configs[:int(len(old_configs) / 2)]: old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0') configs = validator._get_configs('all') insts = validator._get_instances('train') runs_w_rh = validator._get_runs(configs, insts, repetitions=2, runhistory=old_rh) runs_wo_rh = validator._get_runs(configs, insts, repetitions=2) self.assertEqual(len(runs_w_rh[0]), len(runs_wo_rh[0]) - 4) self.assertEqual(len(runs_w_rh[1].data), 4) self.assertEqual(len(runs_wo_rh[1].data), 0)
def __init__(self, scenario: typing.Type[Scenario], rng: typing.Optional[typing.Union[np.random.RandomState, int]] = None, run_id: int = 1, tae: typing.Type[ExecuteTARun] = ExecuteTARunOld, tae_kwargs: typing.Union[dict, None] = None, shared_model: bool = True, validate: bool = True, n_optimizers: int = 2, val_set: typing.Union[typing.List[str], None] = None, n_incs: int = 1, **kwargs): """ Constructor Parameters ---------- scenario : ~smac.scenario.scenario.Scenario Scenario object n_optimizers: int Number of optimizers to run in parallel per round rng: int/np.random.RandomState The randomState/seed to pass to each smac run run_id: int run_id for this hydra run tae: ExecuteTARun Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc) tae_kwargs: Optional[dict] arguments passed to constructor of '~tae' shared_model: bool Flag to indicate whether information is shared between SMAC runs or not validate: bool / None Flag to indicate whether to validate the found configurations or to use the SMAC estimates None => neither and return the full portfolio n_incs: int Number of incumbents to return (n_incs <= 0 ==> all found configurations) val_set: typing.List[str] List of instance-ids to validate on """ self.logger = logging.getLogger( self.__module__ + "." + self.__class__.__name__) self.scenario = scenario self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger) self.kwargs = kwargs self.output_dir = None self.rh = RunHistory() self._tae = tae self._tae_kwargs = tae_kwargs if n_optimizers <= 1: self.logger.warning('Invalid value in %s: %d. Setting to 2', 'n_optimizers', n_optimizers) self.n_optimizers = max(n_optimizers, 2) self.validate = validate self.shared_model = shared_model self.n_incs = min(max(1, n_incs), self.n_optimizers) if val_set is None: self.val_set = scenario.train_insts else: self.val_set = val_set
def test_get_config_runs(self): ''' get some config runs from runhistory ''' rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 3}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=2, seed=2) ist = rh.get_runs_for_config(config=config1) #print(ist) #print(ist[0]) #print(ist[1]) self.assertEqual(len(ist), 2) self.assertEqual(ist[0].instance, 1) self.assertEqual(ist[1].instance, 2)
def test_illegal_input(self): rh = RunHistory() cs = get_config_space() config = Configuration(cs, values={"a": 1, "b": 2}) self.assertTrue(rh.empty()) with pytest.raises(ValueError): rh.add( config=config, cost=[4.5, 5.5, 6.5], time=20, status=StatusType.SUCCESS, instance_id=1, seed=12354, starttime=10, endtime=30, additional_info={"start_time": 10}, ) rh.add( config=config, cost=[2.5, 5.5], time=20, status=StatusType.SUCCESS, instance_id=1, seed=12354, starttime=10, endtime=30, additional_info={"start_time": 10}, )
def setUp(self): unittest.TestCase.setUp(self) self.rh = RunHistory() self.cs = get_config_space() self.config1 = Configuration(self.cs, values={'a': 7, 'b': 11}) self.config2 = Configuration(self.cs, values={'a': 13, 'b': 17}) self.config3 = Configuration(self.cs, values={'a': 0, 'b': 7}) self.config4 = Configuration(self.cs, values={'a': 29, 'b': 31}) self.config5 = Configuration(self.cs, values={'a': 31, 'b': 33}) self.scen = Scenario({ "cutoff_time": 2, 'cs': self.cs, "run_obj": 'runtime', "output_dir": '' }) self.stats = Stats(scenario=self.scen) self.stats.start_timing() # Create the base object self.HB = Hyperband( stats=self.stats, traj_logger=TrajLogger(output_dir=None, stats=self.stats), rng=np.random.RandomState(12345), deterministic=False, run_obj_time=False, instances=[1, 2, 3, 4, 5], n_seeds=2, initial_budget=2, max_budget=5, eta=2, )
def __init__(self, scenario: typing.Type[Scenario], rng: typing.Optional[typing.Union[np.random.RandomState, int]] = None, run_id: int = 1, tae: typing.Type[BaseRunner] = ExecuteTARunOld, tae_kwargs: typing.Union[dict, None] = None, shared_model: bool = True, validate: bool = True, n_optimizers: int = 2, val_set: typing.Union[typing.List[str], None] = None, n_incs: int = 1, **kwargs): self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.scenario = scenario self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger) self.kwargs = kwargs self.output_dir = None self.rh = RunHistory() self._tae = tae self._tae_kwargs = tae_kwargs if n_optimizers <= 1: self.logger.warning('Invalid value in %s: %d. Setting to 2', 'n_optimizers', n_optimizers) self.n_optimizers = max(n_optimizers, 2) self.validate = validate self.shared_model = shared_model self.n_incs = min(max(1, n_incs), self.n_optimizers) if val_set is None: self.val_set = scenario.train_insts else: self.val_set = val_set
def test_inst_no_feat(self): ''' test if scenarios are treated correctly if no features are specified.''' scen = Scenario(self.scen_fn, cmd_options={ 'run_obj': 'quality', 'train_insts': self.train_insts, 'test_insts': self.test_insts }) self.assertTrue(scen.feature_array is None) self.assertEqual(len(scen.feature_dict), 0) scen.instance_specific = self.inst_specs validator = Validator(scen, self.trajectory, self.rng) # Add a few runs and check, if they are correctly processed old_configs = [entry["incumbent"] for entry in self.trajectory] old_rh = RunHistory() for config in old_configs[:int(len(old_configs) / 2)]: old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0', seed=127) rh = validator.validate_epm('all', 'train+test', 1, old_rh) self.assertEqual(len(old_rh.get_all_configs()), 4) self.assertEqual(len(rh.get_all_configs()), 10)
def restore_state(self, args_, scen, root_logger): # Check for folder and files rh_path = os.path.join(args_.restore_state, "runhistory.json") stats_path = os.path.join(args_.restore_state, "stats.json") traj_path = os.path.join(args_.restore_state, "traj_aclib2.json") scen_path = os.path.join(args_.restore_state, "scenario.txt") if not os.path.isdir(args_.restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, scen.cs) root_logger.debug("Restored runhistory from %s", rh_path) stats = Stats(scen) stats.load(stats_path) root_logger.debug("Restored stats from %s", stats_path) trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path, cs=scen.cs) incumbent = trajectory[-1]["incumbent"] root_logger.debug("Restored incumbent %s from %s", incumbent, traj_path) # Copy traj if output_dir of specified scenario-file is different than # the output_dir of the scenario-file in the folder from which to restore. if scen.output_dir != InputReader().read_scenario_file( scen_path)['output_dir']: new_traj_path = os.path.join(scen.output_dir, "traj_aclib2.json") shutil.copy(traj_path, new_traj_path) root_logger.debug("Copied traj %s", rh_path) return rh, stats, incumbent
def setUp(self): unittest.TestCase.setUp(self) self.rh = RunHistory() self.cs = get_config_space() self.config1 = Configuration(self.cs, values={'a': 7, 'b': 11}) self.config2 = Configuration(self.cs, values={'a': 13, 'b': 17}) self.config3 = Configuration(self.cs, values={'a': 0, 'b': 7}) self.config4 = Configuration(self.cs, values={'a': 29, 'b': 31}) self.scen = Scenario({"cutoff_time": 2, 'cs': self.cs, "run_obj": 'runtime', "output_dir": ''}) self.stats = Stats(scenario=self.scen) self.stats.start_timing() # Create the base object self.intensifier = SimpleIntensifier( stats=self.stats, traj_logger=TrajLogger(output_dir=None, stats=self.stats), rng=np.random.RandomState(12345), deterministic=True, run_obj_time=False, instances=[1], )
def __init__(self, scenario, trajectory, output, rng=None): """ Construct Validator for given scenario and trajectory. Parameters ---------- scenario: Scenario scenario object for cutoff, instances and specifics trajectory: Trajectory trajectory to take incumbent(s) from output: string path to runhistory to be saved rng: np.random.RandomState Random number generator """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.scen = scenario self.traj = trajectory if output: self.output = output else: self.output = "validation_rh.json" if isinstance(rng, np.random.RandomState): self.rng = rng elif isinstance(rng, int): self.rng = np.random.RandomState(seed=rng) else: num_run = np.random.randint(MAXINT) self.rng = np.random.RandomState(seed=num_run) self.rh = RunHistory( average_cost) # update this rh with validation-runs
def test_add_and_pickle(self): ''' simply adding some rundata to runhistory, then pickle it ''' rh = RunHistory() cs = get_config_space() config = Configuration(cs, values={'a': 1, 'b': 2}) self.assertTrue(rh.empty()) rh.add(config=config, cost=10, time=20, status=StatusType.SUCCESS, instance_id=None, seed=None, additional_info=None) rh.add(config=config, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=12354, additional_info={"start_time": 10}) self.assertFalse(rh.empty()) tmpfile = tempfile.NamedTemporaryFile(mode='wb', delete=False) pickle.dump(rh, tmpfile, -1) name = tmpfile.name tmpfile.close() with open(name, 'rb') as fh: loaded_rh = pickle.load(fh) self.assertEqual(loaded_rh.data, rh.data)
def test_passed_runhistory(self): ''' test if passed runhistory is in resulting runhistory ''' scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality', 'instances' : self.train_insts, 'test_instances': self.test_insts}) scen.instance_specific = self.inst_specs validator = Validator(scen, self.trajectory, self.rng) # Add a few runs and check, if they are correctly processed old_configs = [entry["incumbent"] for entry in self.trajectory] old_rh = RunHistory(average_cost) seeds = [127 for i in range(int(len(old_configs)/2))] seeds[-1] = 126 # Test instance_seed-structure in validation for config in old_configs[:int(len(old_configs)/2)]: old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0', seed=seeds[old_configs.index(config)]) configs = validator._get_configs('all') insts = validator._get_instances('train') runs_w_rh = validator.get_runs(configs, insts, repetitions=2, runhistory=old_rh) runs_wo_rh = validator.get_runs(configs, insts, repetitions=2) self.assertEqual(len(runs_w_rh[0]), len(runs_wo_rh[0]) - 4) self.assertEqual(len(runs_w_rh[1].data), 4) self.assertEqual(len(runs_wo_rh[1].data), 0)
def combine_runhistories(rhs, logger=None): """Combine list of given runhistories. interleaving to best approximate execution order""" combi_rh = RunHistory() rh_to_runs = {rh: list(rh.data.items()) for rh in rhs} if logger: logger.debug("number of elements: " + str({k: len(v) for k, v in rh_to_runs})) idx = 0 while len(rh_to_runs) > 0: for rh in list(rh_to_runs.keys()): try: k, v = rh_to_runs[rh][idx] combi_rh.add( config=rh.ids_config[k.config_id], cost=v.cost, time=v.time, status=v.status, instance_id=k.instance_id, #TODO budget option seed=k.seed, additional_info=v.additional_info) except IndexError: rh_to_runs.pop(rh) idx += 1 if logger: logger.debug("number of elements in individual rhs: " + str({k: len(v) for k, v in rh_to_runs})) logger.debug("number of elements in combined rh: " + str(len(combi_rh.data))) return combi_rh
def restore_state(scenario: typing.Union[Scenario, ScenarioProperties]): r"""Read in files for state-restoration: runhistory, stats, trajectory. :param scenario: Scenario whose state shall be loaded. :return: (RunHistory, Stats, dict)-tuple """ # Check for folder and files rh_path = os.path.join(scenario.output_dir_for_this_run, 'runhistory.json') stats_path = os.path.join(scenario.output_dir_for_this_run, 'stats.json') traj_path_aclib = os.path.join(scenario.output_dir_for_this_run, 'traj_aclib2.json') if not os.path.isdir(scenario.output_dir_for_this_run): raise FileNotFoundError('Could not find folder from which to restore.') # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, scenario.cs) log.debug('Restored runhistory from %s', rh_path) stats = Stats(scenario) stats.load(stats_path) log.debug('Restored stats from %s', stats_path) trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path_aclib, cs=scenario.cs) incumbent = trajectory[-1]['incumbent'] log.debug('Restored incumbent %s from %s', incumbent, traj_path_aclib) return rh, stats, incumbent
def test_get_initial_points_moo(self): class Model: def predict_marginalized_over_instances(self, X): return X, X class AcquisitionFunction: model = Model() def __call__(self, X): return np.array([x.get_array().sum() for x in X]).reshape( (-1, 1)) ls = LocalSearch( acquisition_function=AcquisitionFunction(), config_space=self.cs, n_steps_plateau_walk=10, max_steps=np.inf, ) runhistory = RunHistory() random_configs = self.cs.sample_configuration(size=100) costs = np.array( [rosenbrock_4d(random_config) for random_config in random_configs]) for random_config, cost in zip(random_configs, costs): runhistory.add(config=random_config, cost=cost, time=0, status=StatusType.SUCCESS) points = ls._get_initial_points(num_points=5, runhistory=runhistory, additional_start_points=None) self.assertEqual(len(points), 10)
def restore_state( self, scen: Scenario, restore_state: str, ) -> typing.Tuple[RunHistory, Stats, typing.List, typing.List]: """Read in files for state-restoration: runhistory, stats, trajectory. """ # Check for folder and files rh_path = os.path.join(restore_state, "runhistory.json") stats_path = os.path.join(restore_state, "stats.json") traj_path_aclib = os.path.join(restore_state, "traj_aclib2.json") traj_path_old = os.path.join(restore_state, "traj_old.csv") _ = os.path.join(restore_state, "scenario.txt") if not os.path.isdir(restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory() rh.load_json(rh_path, scen.cs) # type: ignore[attr-defined] # noqa F821 self.logger.debug("Restored runhistory from %s", rh_path) stats = Stats(scen) stats.load(stats_path) self.logger.debug("Restored stats from %s", stats_path) with open(traj_path_aclib, 'r') as traj_fn: traj_list_aclib = traj_fn.readlines() with open(traj_path_old, 'r') as traj_fn: traj_list_old = traj_fn.readlines() return rh, stats, traj_list_aclib, traj_list_old