def restore_state(scenario: typing.Union[Scenario, ScenarioProperties]): r"""Read in files for state-restoration: runhistory, stats, trajectory. :param scenario: Scenario whose state shall be loaded. :return: (RunHistory, Stats, dict)-tuple """ # Check for folder and files rh_path = os.path.join(scenario.output_dir_for_this_run, 'runhistory.json') stats_path = os.path.join(scenario.output_dir_for_this_run, 'stats.json') traj_path_aclib = os.path.join(scenario.output_dir_for_this_run, 'traj_aclib2.json') if not os.path.isdir(scenario.output_dir_for_this_run): raise FileNotFoundError('Could not find folder from which to restore.') # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, scenario.cs) log.debug('Restored runhistory from %s', rh_path) stats = Stats(scenario) stats.load(stats_path) log.debug('Restored stats from %s', stats_path) trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path_aclib, cs=scenario.cs) incumbent = trajectory[-1]['incumbent'] log.debug('Restored incumbent %s from %s', incumbent, traj_path_aclib) return rh, stats, incumbent
def main(): # Initialize scenario, using runcount_limit as budget. orig_scen_dict = { 'algo': 'python cmdline_wrapper.py', 'paramfile': 'param_config_space.pcs', 'run_obj': 'quality', 'runcount_limit': 25, 'deterministic': True, 'output_dir': 'restore_me' } original_scenario = Scenario(orig_scen_dict) smac = SMAC(scenario=original_scenario) smac.optimize() print( "\n########## BUDGET EXHAUSTED! Restoring optimization: ##########\n") # Now the output is in the folder 'restore_me' # # We could simply modify the scenario-object, stored in # 'smac.solver.scenario' and start optimization again: #smac.solver.scenario.ta_run_limit = 50 #smac.optimize() # Or, to show the whole process of recovering a SMAC-run from the output # directory, create a new scenario with an extended budget: new_scenario = Scenario( orig_scen_dict, cmd_args={ 'runcount_limit': 50, # overwrite these args 'output_dir': 'restored' }) # We load the runhistory, ... rh_path = os.path.join(original_scenario.output_dir, "runhistory.json") runhistory = RunHistory(aggregate_func=None) runhistory.load_json(rh_path, new_scenario.cs) # ... stats, ... stats_path = os.path.join(original_scenario.output_dir, "stats.json") stats = Stats(new_scenario) stats.load(stats_path) # ... and trajectory. traj_path = os.path.join(original_scenario.output_dir, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path, cs=new_scenario.cs) incumbent = trajectory[-1]["incumbent"] # Because we changed the output_dir, we might want to copy the old # trajectory-file (runhistory and stats will be complete) new_traj_path = os.path.join(new_scenario.output_dir, "traj_aclib2.json") shutil.copy(traj_path, new_traj_path) # Now we can initialize SMAC with the recovered objects and restore the # state where we left off. By providing stats and a restore_incumbent, SMAC # automatically detects the intention of restoring a state. smac = SMAC(scenario=new_scenario, runhistory=runhistory, stats=stats, restore_incumbent=incumbent) smac.optimize()
def restore_state(self, args_, scen, root_logger): # Check for folder and files rh_path = os.path.join(args_.restore_state, "runhistory.json") stats_path = os.path.join(args_.restore_state, "stats.json") traj_path = os.path.join(args_.restore_state, "traj_aclib2.json") scen_path = os.path.join(args_.restore_state, "scenario.txt") if not os.path.isdir(args_.restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, scen.cs) root_logger.debug("Restored runhistory from %s", rh_path) stats = Stats(scen) stats.load(stats_path) root_logger.debug("Restored stats from %s", stats_path) trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path, cs=scen.cs) incumbent = trajectory[-1]["incumbent"] root_logger.debug("Restored incumbent %s from %s", incumbent, traj_path) # Copy traj if output_dir of specified scenario-file is different than # the output_dir of the scenario-file in the folder from which to restore. if scen.output_dir != InputReader().read_scenario_file( scen_path)['output_dir']: new_traj_path = os.path.join(scen.output_dir, "traj_aclib2.json") shutil.copy(traj_path, new_traj_path) root_logger.debug("Copied traj %s", rh_path) return rh, stats, incumbent
def restore_state_before_scen(self, args_): """Read in files for state-restoration: runhistory, stats, trajectory. """ # Construct dummy-scenario for object-creation (mainly cs is needed) tmp_scen = InputReader().read_scenario_file(args_.scenario_file) tmp_scen = Scenario(tmp_scen, cmd_args={'output_dir': ''}) # Check for folder and files rh_path = os.path.join(args_.restore_state, "runhistory.json") stats_path = os.path.join(args_.restore_state, "stats.json") traj_path_aclib = os.path.join(args_.restore_state, "traj_aclib2.json") traj_path_old = os.path.join(args_.restore_state, "traj_old.csv") scen_path = os.path.join(args_.restore_state, "scenario.txt") if not os.path.isdir(args_.restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, tmp_scen.cs) self.logger.debug("Restored runhistory from %s", rh_path) stats = Stats( tmp_scen) # Need to inject actual scenario later for output_dir! stats.load(stats_path) self.logger.debug("Restored stats from %s", stats_path) with open(traj_path_aclib, 'r') as traj_fn: traj_list_aclib = traj_fn.readlines() with open(traj_path_old, 'r') as traj_fn: traj_list_old = traj_fn.readlines() return rh, stats, traj_list_aclib, traj_list_old
def restore_state( self, scen: Scenario, restore_state: str, ) -> typing.Tuple[RunHistory, Stats, typing.List, typing.List]: """Read in files for state-restoration: runhistory, stats, trajectory. """ # Check for folder and files rh_path = os.path.join(restore_state, "runhistory.json") stats_path = os.path.join(restore_state, "stats.json") traj_path_aclib = os.path.join(restore_state, "traj_aclib2.json") traj_path_old = os.path.join(restore_state, "traj_old.csv") _ = os.path.join(restore_state, "scenario.txt") if not os.path.isdir(restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory() rh.load_json(rh_path, scen.cs) # type: ignore[attr-defined] # noqa F821 self.logger.debug("Restored runhistory from %s", rh_path) stats = Stats(scen) stats.load(stats_path) self.logger.debug("Restored stats from %s", stats_path) with open(traj_path_aclib, 'r') as traj_fn: traj_list_aclib = traj_fn.readlines() with open(traj_path_old, 'r') as traj_fn: traj_list_old = traj_fn.readlines() return rh, stats, traj_list_aclib, traj_list_old
def dummy_traditional_classification(self, time_left: int, func_eval_time_limit_secs: int) -> None: run_history = RunHistory() run_history.load_json('./.tmp_api/traditional_run_history.json', TraditionalTabularClassificationPipeline(dataset_properties={ 'numerical_columns': [10] }).get_hyperparameter_search_space()) self.run_history.update(run_history, DataOrigin.EXTERNAL_SAME_INSTANCES) run_history.save_json(os.path.join(self._backend.internals_directory, 'traditional_run_history.json'), save_external=True) return
def main(): parser = argparse.ArgumentParser(description='test', fromfile_prefix_chars="@") parser.add_argument('-s', '--scenario_file', dest='scenario', required=True) parser.add_argument('-rh', '--runhistory_file', dest='runhistory', required=True) parser.add_argument('-o', '--output_file', dest='output', required=True) args = parser.parse_args() scenario = Scenario(args.scenario) # We load the runhistory, ... rh_path = os.path.join(args.runhistory) runhistory = RunHistory(aggregate_func=None) runhistory.load_json(rh_path, scenario.cs) cost_default = [] cost_incumbent = [] for entry, values in runhistory.data.items( ): # iterate over data because it is an OrderedDict config_id = entry.config_id # look up config id config = runhistory.ids_config[config_id] # look up config z_ = values.cost # get cost if z_ > 100: z_ = 150 if config_id == 1: cost_default.append(z_) else: cost_incumbent.append(z_) fig1 = plt.figure() ax1 = fig1.add_subplot(111) ax1.plot(cost_incumbent, cost_default, linestyle='None', marker='o', color="black") ax1.plot([0, 100], [0, 100], 'r-') ax1.plot([0, 100], [100, 100], linestyle='dashed', color="black") ax1.plot([100, 100], [100, 0], linestyle='dashed', color="black") plt.ylabel("Default Configuration") plt.xlabel("Incumbent") ax1.set_xlim([0, 175]) ax1.set_ylim([0, 175]) plt.gca().set_aspect('equal', adjustable='box') plt.title("Performance of Incumbent compared to Default Configuration") plt.savefig(args.output)
def merge_foreign_data_from_file( scenario: Scenario, runhistory: RunHistory, in_scenario_fn_list: typing.List[str], in_runhistory_fn_list: typing.List[str], cs: ConfigurationSpace, aggregate_func: typing.Callable = average_cost): """Extend <scenario> and <runhistory> with runhistory data from another <in_scenario> assuming the same pcs, feature space, but different instances Parameters --------- scenario: Scenario original scenario -- feature dictionary will be extended runhistory: RunHistory original runhistory -- will be extended by further data points in_scenario_fn_list: typing.List[str] input scenario file names in_runhistory_fn_list: typing.List[str] list filenames of runhistory dumps cs: ConfigurationSpace parameter configuration space to read runhistory from file aggregate_func: typing.Callable function to aggregate performance of a configuratoion across instances Returns ------- scenario: Scenario runhistory: Runhistory """ if not in_scenario_fn_list: raise ValueError( "To read warmstart data from previous runhistories, the corresponding scenarios are required. Use option --warmstart_scenario" ) scens = [ Scenario(scenario=scen_fn, cmd_args={"output_dir": ""}) for scen_fn in in_scenario_fn_list ] rhs = [] for rh_fn in in_runhistory_fn_list: rh = RunHistory(aggregate_func) rh.load_json(rh_fn, cs) rhs.append(rh) return merge_foreign_data(scenario, runhistory, in_scenario_list=scens, in_runhistory_list=rhs)
def main(): parser = argparse.ArgumentParser(description='test', fromfile_prefix_chars="@") parser.add_argument('-s', '--scenario_file', dest='scenario', required=True) parser.add_argument('-rh', '--runhistory_file', dest='runhistory', required=True) parser.add_argument('-o', '--output_file', dest='output', required=True) args = parser.parse_args() scenario = Scenario(args.scenario) # We load the runhistory, ... rh_path = os.path.join(args.runhistory) runhistory = RunHistory(aggregate_func=None) runhistory.load_json(rh_path, scenario.cs) cost_default = 1 cost_incumbent = [] for entry, values in runhistory.data.items( ): # iterate over data because it is an OrderedDict config_id = entry.config_id # look up config id config = runhistory.ids_config[config_id] # look up config z_ = values.cost # get cost if config_id == 1: #default configuration cost_default = z_ else: cost_incumbent.append(z_) fig1 = plt.figure() ax1 = fig1.add_subplot(111) x = range(len(cost_incumbent)) ax1.plot(x, cost_incumbent, linestyle='None', marker='+', color="black", label='SMAC') ax1.plot([0, len(x)], [cost_default, cost_default], 'r-', label='Default Configuration') plt.ylabel("Loss") plt.xlabel("SMAC") plt.title("Performance of Incumbents compared to Default Configuration") plt.legend() plt.savefig(args.output)
def get_validated_runhistory(self, cs): """ Returns ------- validated_rh: RunHistory runhistory with validation-data, if available """ rh_fn = os.path.join(self.folder, 'validated_runhistory.json') rh = RunHistory() try: rh.load_json(rh_fn, cs) except FileNotFoundError: self.logger.warning("%s not found. trying to read SMAC3-validation-output, " "if that's not correct, change it with the " "--validation_format option!", rh_fn) raise return rh
def load_autosklearn(base_dir: str): try: with open(os.path.join(base_dir, 'configspace.pkl'), 'rb') as f: cs = pickle.load(f) with open(os.path.join(base_dir, 'start.txt'), 'r') as f: start = float(f.readline()) rh = RunHistory(lambda x: x) rh.load_json(os.path.join(base_dir, 'runhistory_0.json'), cs) scores = [] for value in rh.data.values(): scores.append( (value.additional_info + value.time - start, value.cost)) except FileNotFoundError: scores = [(min_time, worst_score[0]), (max_time, worst_score[0])] return _format(scores)
def get_runhistory(self, cs): """ Returns ------- rh: RunHistory runhistory """ rh_fn = os.path.join(self.folder, 'runhistory.json') if not os.path.isfile(rh_fn): rh_fn = self.get_glob_file(self.folder, 'runhistory.json') rh = RunHistory() try: rh.load_json(rh_fn, cs) except FileNotFoundError: self.logger.warning("%s not found. trying to read SMAC3-output, " "if that's not correct, change it with the " "--file_format option!", rh_fn) raise return rh
def get_runhistory(self, cs): """ Returns: -------- rh: RunHistory runhistory """ rh_fn = os.path.join(self.folder, 'runhistory.json') validated_rh_fn = os.path.join(self.folder, 'validated_runhistory.json') rh = RunHistory(average_cost) try: rh.load_json(rh_fn, cs) except FileNotFoundError: self.logger.warning( "%s not found. trying to read SMAC3-output, " "if that's not correct, change it with the " "--format option!", rh_fn) raise return rh
def restore_state(self, scen, args_): """Read in files for state-restoration: runhistory, stats, trajectory. """ # Check for folder and files rh_path = os.path.join(args_.restore_state, "runhistory.json") stats_path = os.path.join(args_.restore_state, "stats.json") traj_path_aclib = os.path.join(args_.restore_state, "traj_aclib2.json") traj_path_old = os.path.join(args_.restore_state, "traj_old.csv") scen_path = os.path.join(args_.restore_state, "scenario.txt") if not os.path.isdir(args_.restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, scen.cs) self.logger.debug("Restored runhistory from %s", rh_path) stats = Stats(scen) stats.load(stats_path) self.logger.debug("Restored stats from %s", stats_path) with open(traj_path_aclib, 'r') as traj_fn: traj_list_aclib = traj_fn.readlines() with open(traj_path_old, 'r') as traj_fn: traj_list_old = traj_fn.readlines() return rh, stats, traj_list_aclib, traj_list_old
def create_or_restore_smac(scenario_dict, rng, tae): out_dir = path.join(scenario_dict['output_dir'], 'run_1') if True or not isfile(path.join(out_dir, "traj_aclib2.json")): # if some incomplete data lays arround, delete it completely shutil.rmtree(out_dir, ignore_errors=True) scenario = Scenario(scenario_dict) smac = SMAC4HPO(scenario=scenario, rng=rng, tae_runner=tae, initial_design=SobolDesign, run_id=1) else: new_scenario = Scenario(scenario_dict) rh_path = path.join(out_dir, "runhistory.json") runhistory = RunHistory(aggregate_func=None) runhistory.load_json(rh_path, new_scenario.cs) # ... stats, ... stats_path = path.join(out_dir, "stats.json") stats = Stats(new_scenario) stats.load(stats_path) # ... and trajectory. traj_path = path.join(out_dir, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path, cs=new_scenario.cs) incumbent = trajectory[-1]["incumbent"] # Now we can initialize SMAC with the recovered objects and restore the # state where we left off. By providing stats and a restore_incumbent, SMAC # automatically detects the intention of restoring a state. smac = SMAC4HPO(scenario=new_scenario, runhistory=runhistory, stats=stats, restore_incumbent=incumbent, run_id=1) print('restored smac from:', out_dir) return smac
def test_json_origin(self): for origin in ['test_origin', None]: rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}, origin=origin) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) path = 'test/test_files/test_json_origin.json' rh.save_json(path) _ = rh.load_json(path, cs) self.assertEqual(rh.get_all_configs()[0].origin, origin) os.remove(path)
# smac.optimize() # Or, to show the whole process of recovering a SMAC-run from the output # directory, create a new scenario with an extended budget: new_scenario = Scenario( original_scenario_dict, cmd_options={ "runcount_limit": 50, # overwrite these args "output_dir": "restored", }, ) # We load the runhistory rh_path = os.path.join(old_output_dir, "runhistory.json") runhistory = RunHistory() runhistory.load_json(rh_path, new_scenario.cs) # And the stats stats_path = os.path.join(old_output_dir, "stats.json") stats = Stats(new_scenario) stats.load(stats_path) # And the trajectory traj_path = os.path.join(old_output_dir, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path, cs=new_scenario.cs) incumbent = trajectory[-1]["incumbent"] # Now we can initialize SMAC with the recovered objects and restore the # state where we left off. By providing stats and a restore_incumbent, SMAC # automatically detects the intention of restoring a state.
'data', help='File in json format that contains all validated runs') parser.add_argument('scenario', help='Scenario file') args, unkown = parser.parse_known_args() logging.basicConfig(level=logging.INFO) if unkown: logging.warning('Could not parse the following arguments: ') logging.warning(str(unkown)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Create Runhistory object as well as scenario object runhist = RunHistory(average_cost) scenario = Scenario(args.scenario, cmd_args={'output_dir': ""}) cs = scenario.cs runhist.load_json(args.data, cs) # populate the runhistory with the validation data configs = runhist.get_all_configs() def_ = cs.get_default_configuration() def_dict = def_.get_dictionary() # Switch it around such that statistics about the default are gathered first if configs[0] != def_: tmp = configs[0] configs[0] = configs[1] configs[1] = tmp del tmp logging.info('Found %d configs' % len(configs)) logging.info('Cost per config:') # For each config for config in configs:
def test_load(self): configuration_space = test_helpers.get_branin_config_space() other_runhistory = '{"data": [[[2, "branini", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[1, "branin", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[3, "branin-hoo", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[2, null, 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[1, "branini", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[4, null, 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]]], ' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' other_runhistory_filename = os.path.join(self.tmp_dir, 'runhistory.json') with open(other_runhistory_filename, 'w') as fh: fh.write(other_runhistory) # load from an empty runhistory runhistory = RunHistory(aggregate_func=average_cost) runhistory.load_json(other_runhistory_filename, configuration_space) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) self.assertEqual(len(runhistory.data), 6) # load from non-empty runhistory, in case of a duplicate the existing # result will be kept and the new one silently discarded runhistory = RunHistory(aggregate_func=average_cost) configuration_space.seed(1) config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 6) self.assertEqual(id_before, id_after) # load from non-empty runhistory, in case of a duplicate the existing # result will be kept and the new one silently discarded runhistory = RunHistory(aggregate_func=average_cost) configuration_space.seed(1) config = configuration_space.sample_configuration() config = configuration_space.sample_configuration() # This is the former config_3 config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 7) self.assertEqual(id_before, id_after) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) self.assertEqual( [runhistory.external[run_key] for run_key in runhistory.data], [DataOrigin.INTERNAL] + [DataOrigin.EXTERNAL_SAME_INSTANCES] * 6)
def main(): parser = argparse.ArgumentParser(description='test', fromfile_prefix_chars="@") parser.add_argument('-s', '--scenario_SMAC', dest='scenario_SMAC', required=True) parser.add_argument('-rh', '--runhistory_SMAC', dest='runhistory_SMAC', required=True) parser.add_argument('-sR', '--scenario_ROAR', dest='scenario_ROAR', required=True) parser.add_argument('-rhR', '--runhistory_ROAR', dest='runhistory_ROAR', required=True) parser.add_argument('-o', '--output', dest='output', required=True) args = parser.parse_args() scenario_SMAC = Scenario(args.scenario_SMAC) scenario_ROAR = Scenario(args.scenario_ROAR) # We load the runhistory, ... rh_path_SMAC = os.path.join(args.runhistory_SMAC) runhistory_SMAC = RunHistory(aggregate_func=None) runhistory_SMAC.load_json(rh_path_SMAC, scenario_SMAC.cs) rh_path_ROAR = os.path.join(args.runhistory_ROAR) runhistory_ROAR = RunHistory(aggregate_func=None) runhistory_ROAR.load_json(rh_path_ROAR, scenario_ROAR.cs) cost_SMAC = [] cost_ROAR = [] for entry, values in runhistory_SMAC.data.items( ): # iterate over data because it is an OrderedDict if len(cost_SMAC) == 20: break config_id = entry.config_id # look up config id config = runhistory_SMAC.ids_config[config_id] # look up config z_ = values.cost # get cost cost_SMAC.append(z_) for entry, values in runhistory_ROAR.data.items( ): # iterate over data because it is an OrderedDict config_id = entry.config_id # look up config id config = runhistory_ROAR.ids_config[config_id] # look up config z_ = values.cost # get cost cost_ROAR.append(z_) fig1 = plt.figure() ax1 = fig1.add_subplot(111) x = range(20) #comparison is done only for 20 runs #plot SMAC and ROAR performance ax1.plot(cost_SMAC, 'o', color='red', label='SMAC') ax1.plot(cost_ROAR, '+', color='blue', label='ROAR') plt.ylabel("Loss") plt.xlabel("#Evaluations") plt.title("Performance of SMAC compared to ROAR") plt.legend() plt.savefig(args.output)
def validate(self, config_mode: str = 'def', instance_mode: str = 'test', repetitions: int = 1, n_jobs: int = 1, backend: str = 'threading', runhistory: RunHistory = None, tae: ExecuteTARun = None): """ Validate configs on instances and save result in runhistory. Parameters ---------- config_mode: string what configurations to validate from [def, inc, def+inc, time, all], time means evaluation at timesteps 2^-4, 2^-3, 2^-2, 2^-1, 2^0, 2^1, ... instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms n_jobs: int number of parallel processes used by joblib runhistory: RunHistory or string or None runhistory to take data from tae: ExecuteTARun tae to be used. if none, will initialize ExecuteTARunOld Returns ------- runhistory: RunHistory runhistory with validated runs """ self.logger.debug( "Validating configs '%s' on instances '%s', repeating %d times" " with %d parallel runs on backend '%s'.", config_mode, instance_mode, repetitions, n_jobs, backend) # Reset runhistory self.rh = RunHistory(average_cost) # Get relevant configurations and instances configs = self._get_configs(config_mode) instances = self._get_instances(instance_mode) # If runhistory is given as string, load into memory if isinstance(runhistory, str): fn = runhistory runhistory = RunHistory(average_cost) runhistory.load_json(fn, self.scen.cs) # Get all runs needed as list runs = self.get_runs(configs, instances, repetitions=repetitions, runhistory=runhistory) # Create new Stats without limits inf_scen = Scenario({ 'run_obj': self.scen.run_obj, 'cutoff_time': self.scen.cutoff, 'output_dir': None }) inf_stats = Stats(inf_scen) inf_stats.start_timing() # Create TAE if not tae: tae = ExecuteTARunOld(ta=self.scen.ta, stats=inf_stats, run_obj=self.scen.run_obj, par_factor=self.scen.par_factor, cost_for_crash=self.scen.cost_for_crash) else: # Inject endless-stats tae.stats = inf_stats # Validate! run_results = self._validate_parallel(tae, runs, n_jobs, backend) # tae returns (status, cost, runtime, additional_info) # Add runs to RunHistory idx = 0 for result in run_results: self.rh.add(config=runs[idx]['config'], cost=result[1], time=result[2], status=result[0], instance_id=runs[idx]['inst'], seed=runs[idx]['seed'], additional_info=result[3]) idx += 1 # Save runhistory if not self.output.endswith('.json'): old = self.output self.output = os.path.join(self.output, 'validated_runhistory.json') self.logger.debug("Output is \"%s\", changing to \"%s\"!", old, self.output) base = os.path.split(self.output)[0] if not os.path.exists(base): self.logger.debug("Folder (\"%s\") doesn't exist, creating.", base) os.makedirs(base) self.logger.info("Saving validation-results in %s", self.output) self.rh.save_json(self.output) return self.rh
def test_load(self): configuration_space = test_helpers.get_branin_config_space() other_runhistory = '{"data": [[[2, "branini", 1], [1, 1, 1, null]], ' \ '[[1, "branin", 1], [1, 1, 1, null]], ' \ '[[3, "branin-hoo", 1], [1, 1, 1, null]], ' \ '[[2, null, 1], [1, 1, 1, null]], ' \ '[[1, "branini", 1], [1, 1, 1, null]], ' \ '[[4, null, 1], [1, 1, 1, null]]], ' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' other_runhistory_filename = os.path.join(self.tmp_dir, '.runhistory_20.json') with open(other_runhistory_filename, 'w') as fh: fh.write(other_runhistory) # load from an empty runhistory runhistory = RunHistory() runhistory.load_json(other_runhistory_filename, configuration_space) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) self.assertEqual(len(runhistory.data), 6) # load from non-empty runhistory, but existing run will be overridden # because it alread existed runhistory = RunHistory() configuration_space.seed(1) config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 6) self.assertNotEqual(id_before, id_after) # load from non-empty runhistory, but existing run will not be # overridden, but config_id will be re-used runhistory = RunHistory() configuration_space.seed(1) config = configuration_space.sample_configuration() config = configuration_space.sample_configuration() # This is the former config_3 config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 7) self.assertEqual(id_before, id_after) print(runhistory.config_ids) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) print(list(runhistory.data.keys()))