def test_pass_tae(self): scen = Scenario(self.scen_fn, cmd_options={'run_obj': 'quality'}) tae = ExecuteTARunOld(ta=scen.ta, stats=self.stats) validator = Validator(scen, self.trajectory) rh_mock = mock.Mock() with mock.patch.object( Validator, "_validate_parallel", return_value=[ mock.MagicMock(), mock.MagicMock(), mock.MagicMock(), mock.MagicMock() ], ) as validate_parallel_mock: with mock.patch.object( Validator, "_get_runs", return_value=[[ mock.Mock(), mock.Mock(), mock.Mock(), mock.Mock() ], rh_mock], ): validator.validate(tae=tae) self.assertIs(validate_parallel_mock.call_args[0][0], tae) self.assertEqual(rh_mock.add.call_count, 4)
def test_validate(self): ''' test validation ''' scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality', 'instances' : self.train_insts, 'test_instances': self.test_insts}) scen.instance_specific = self.inst_specs validator = Validator(scen, self.trajectory, self.rng) # Test basic usage rh = validator.validate(config_mode='def', instance_mode='test', repetitions=3) self.assertEqual(len(rh.get_all_configs()), 1) self.assertEqual(len(rh.get_runs_for_config(rh.get_all_configs()[0])), 9) rh = validator.validate(config_mode='inc', instance_mode='train+test') self.assertEqual(len(rh.get_all_configs()), 1) self.assertEqual(len(rh.get_runs_for_config(rh.get_all_configs()[0])), 6) rh = validator.validate(config_mode='wallclock_time', instance_mode='train') self.assertEqual(len(rh.get_all_configs()), 7) self.assertEqual(sum([len(rh.get_runs_for_config(c)) for c in rh.get_all_configs()]), 21) # Test with backend multiprocessing rh = validator.validate(config_mode='def', instance_mode='test', repetitions=3, backend='multiprocessing') self.assertEqual(len(rh.get_all_configs()), 1) self.assertEqual(len(rh.get_runs_for_config(rh.get_all_configs()[0])), 9)
def test_parallel(self): ''' test parallel ''' validator = Validator(self.scen, self.trajectory, self.output_rh, self.rng) validator.validate(config_mode='all', instance_mode='train+test', n_jobs=-1)
def test_validate(self): ''' test validation ''' self.scen.train_insts = self.train_insts self.scen.test_insts = self.test_insts validator = Validator(self.scen, self.trajectory, self.output_rh, self.rng) # Test basic usage rh = validator.validate(config_mode='def', instance_mode='test', repetitions=3) self.assertEqual(len(rh.get_all_configs()), 1) self.assertEqual(len(rh.get_runs_for_config(rh.get_all_configs()[0])), 9) rh = validator.validate(config_mode='inc', instance_mode='train+test') self.assertEqual(len(rh.get_all_configs()), 1) self.assertEqual(len(rh.get_runs_for_config(rh.get_all_configs()[0])), 6) rh = validator.validate(config_mode='time', instance_mode='train') self.assertEqual(len(rh.get_all_configs()), 9) self.assertEqual( sum([len(rh.get_runs_for_config(c)) for c in rh.get_all_configs()]), 27) # Test with backend multiprocessing rh = validator.validate(config_mode='def', instance_mode='test', repetitions=3, backend='multiprocessing') self.assertEqual(len(rh.get_all_configs()), 1) self.assertEqual(len(rh.get_runs_for_config(rh.get_all_configs()[0])), 9)
def test_parallel(self): ''' test parallel ''' scen = Scenario(self.scen_fn, cmd_options={'run_obj': 'quality'}) validator = Validator(scen, self.trajectory, self.rng) validator.validate(config_mode='all', instance_mode='train+test', n_jobs=-1)
def validate(self, config_mode='inc', instance_mode='train+test', repetitions=1, n_jobs=-1, backend='threading'): """Create validator-object and run validation, using scenario-information, runhistory from smbo and tae_runner from intensify Parameters ---------- config_mode: string what configurations to validate from [def, inc, def+inc, time, all], time means evaluation at timesteps 2^-4, 2^-3, 2^-2, 2^-1, 2^0, 2^1, ... instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms (in deterministic will be fixed to 1) n_jobs: int number of parallel processes used by joblib Returns ------- runhistory: RunHistory runhistory containing all specified runs """ traj_fn = os.path.join(self.scenario.output_dir, "traj_aclib2.json") trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=self.scenario.cs) new_rh_path = os.path.join(self.scenario.output_dir, "validated_runhistory.json") validator = Validator(self.scenario, trajectory, new_rh_path, self.rng) new_rh = validator.validate(config_mode, instance_mode, repetitions, n_jobs, backend, self.runhistory, self.intensifier.tae_runner) return new_rh
def optimize(optimizer, scenario, trajectory=None): then = time.time() best_conf = optimizer.optimize() print(best_conf) print('training time:', time.time() - then) traj_logger = TrajLogger(None, Stats(scenario)) trajectory = trajectory or traj_logger.read_traj_aclib_format( "smac-output/run_1/traj_aclib2.json", scenario.cs) validator = Validator(scenario, trajectory, rng=np.random.RandomState(42)) # evaluate on test instances and calculate cpu time then = time.time() runhis_dev = validator.validate(config_mode="def", instance_mode="test") runhis_inc = validator.validate(config_mode="inc", instance_mode="test") print('validating time:', time.time() - then) default_conf = runhis_dev.ids_config[1] incumbent_conf = runhis_inc.ids_config[1] dev_vals = get_instance_costs_for_config(runhis_dev, default_conf) inc_vals = get_instance_costs_for_config(runhis_inc, incumbent_conf) # ###### Filter runs for plotting ####### dev_x = [] inc_x = [] for key in set(dev_vals.keys()) & set(inc_vals.keys()): dev_x.append(dev_vals[key]) inc_x.append(inc_vals[key]) # print(dev_vals) # print(inc_vals) print(dev_x) print(inc_x) print('PAR10:', np.mean(inc_x), '/', np.mean(dev_x)) max_x = 1000.0 par1er = lambda xx: np.mean([(x / 10 if x == max_x else x) for x in xx]) print('PAR1 :', par1er(inc_x), '/', par1er(dev_x)) to_counter = lambda xx: len([x for x in xx if x == max_x]) print('TOs :', to_counter(inc_x), '/', to_counter(dev_x)) print('wins :', len([i for i in range(len(dev_x)) if dev_x[i] > inc_x[i]]), '/', len(dev_x)) fig, ax = plt.subplots() ax.scatter(dev_x, inc_x, marker="x") ax.set_xlabel("Default Configuration") ax.set_ylabel("Incumbent Configuration") lims = [ np.min([ax.get_xlim(), ax.get_ylim()]), np.max([ax.get_xlim(), ax.get_ylim()]) ] ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0) # ax.set_xlim(lims) # ax.set_ylim(lims) ax.set_xscale('log') ax.set_yscale('log') fig.savefig("fig-smac.png")
def test_pass_tae(self): scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality'}) tae = ExecuteTARunOld(ta=scen.ta) validator = Validator(scen, self.trajectory) with mock.patch.object(Validator, "_validate_parallel", return_value=[(1,2,3,4)]): self.assertEqual(1, len(validator.validate(tae=tae).data))
def validate(self, config_mode: typing.Union[str, typing.List[Configuration]] = 'inc', instance_mode: typing.Union[str, typing.List[str]] = 'train+test', repetitions: int = 1, use_epm: bool = False, n_jobs: int = -1, backend: str = 'threading') -> RunHistory: """Create validator-object and run validation, using scenario-information, runhistory from smbo and tae_runner from intensify Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration str from [def, inc, def+inc, wallclock_time, cpu_time, all] time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: string what instances to use for validation, from [train, test, train+test] repetitions: int number of repetitions in nondeterministic algorithms (in deterministic will be fixed to 1) use_epm: bool whether to use an EPM instead of evaluating all runs with the TAE n_jobs: int number of parallel processes used by joblib Returns ------- runhistory: RunHistory runhistory containing all specified runs """ if isinstance(config_mode, str): assert self.scenario.output_dir_for_this_run is not None # Please mypy traj_fn = os.path.join(self.scenario.output_dir_for_this_run, "traj_aclib2.json") trajectory = ( TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=self.config_space) ) # type: typing.Optional[typing.List[typing.Dict[str, typing.Union[float, int, Configuration]]]] else: trajectory = None if self.scenario.output_dir_for_this_run: new_rh_path = os.path.join(self.scenario.output_dir_for_this_run, "validated_runhistory.json") # type: typing.Optional[str] # noqa E501 else: new_rh_path = None validator = Validator(self.scenario, trajectory, self.rng) if use_epm: new_rh = validator.validate_epm(config_mode=config_mode, instance_mode=instance_mode, repetitions=repetitions, runhistory=self.runhistory, output_fn=new_rh_path) else: new_rh = validator.validate(config_mode, instance_mode, repetitions, n_jobs, backend, self.runhistory, self.intensifier.tae_runner, output_fn=new_rh_path) return new_rh
def test_validate_no_insts(self): ''' no instances ''' scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality'}) validator = Validator(scen, self.trajectory, self.rng) rh = validator.validate(config_mode='def+inc', instance_mode='train', repetitions=3, output_fn=self.output_rh) self.assertEqual(len(rh.get_all_configs()), 2) self.assertEqual(sum([len(rh.get_runs_for_config(c)) for c in rh.get_all_configs()]), 6)
def test_validate_no_insts(self): ''' no instances ''' validator = Validator(self.scen, self.trajectory, self.output_rh, self.rng) rh = validator.validate(config_mode='def+inc', instance_mode='train', repetitions=3) self.assertEqual(len(rh.get_all_configs()), 2) self.assertEqual( sum([len(rh.get_runs_for_config(c)) for c in rh.get_all_configs()]), 6)
def test_validate_deterministic(self): ''' deterministic ta ''' scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality', 'instances' : self.train_insts, 'deterministic': True}) scen.instance_specific = self.inst_specs validator = Validator(scen, self.trajectory, self.rng) rh = validator.validate(config_mode='def+inc', instance_mode='train', repetitions=3) self.assertEqual(len(rh.get_all_configs()), 2) self.assertEqual(sum([len(rh.get_runs_for_config(c)) for c in rh.get_all_configs()]), 6)
def test_validate_deterministic(self): ''' deterministic ta ''' self.scen.deterministic = True self.scen.train_insts = self.train_insts validator = Validator(self.scen, self.trajectory, self.output_rh, self.rng) rh = validator.validate(config_mode='def+inc', instance_mode='train', repetitions=3) self.assertEqual(len(rh.get_all_configs()), 2) self.assertEqual( sum([len(rh.get_runs_for_config(c)) for c in rh.get_all_configs()]), 6)
def test_validate_no_insts(self): ''' no instances ''' scen = Scenario(self.scen_fn, cmd_options={ 'run_obj': 'quality', 'save-instantly': False, 'deterministic': False, }) validator = Validator(scen, self.trajectory, self.rng) rh = validator.validate(config_mode='def+inc', instance_mode='train', repetitions=3, output_fn=self.output_rh) self.assertEqual(len(rh.get_all_configs()), 2) self.assertEqual( sum([ len(rh.get_runs_for_config(c, only_max_observed_budget=True)) for c in rh.get_all_configs() ]), 6)
if args_.tae == "aclib": tae = ExecuteTARunAClib(ta=scenario.ta, run_obj=scenario.run_obj, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) validator = Validator(scenario, trajectory, args_.seed) # Load runhistory if args_.runhistory: runhistory = RunHistory(average_cost) for rh_path in args_.runhistory: runhistory.update_from_json(rh_path, scenario.cs) else: runhistory = None if args_.epm: validator.validate_epm(config_mode=args_.configs, instance_mode=args_.instances, repetitions=args_.repetitions, runhistory=runhistory, output_fn=args_.output) else: validator.validate(config_mode=args_.configs, instance_mode=args_.instances, repetitions=args_.repetitions, n_jobs=args_.n_jobs, runhistory=runhistory, tae=tae, output_fn=args_.output)
def validate(self, scenario, i): traj_logger = TrajLogger(None, Stats(scenario)) trajectory = traj_logger.read_traj_aclib_format( "es-output/run_1/traj_aclib2.json", scenario.cs) validator = Validator(scenario, trajectory, rng=np.random.RandomState(42)) # evaluate on test instances and calculate cpu time runhis_dev = validator.validate(config_mode="def", instance_mode="test") runhis_inc = validator.validate(config_mode="inc", instance_mode="test") # copied from the smac documentation, is not included in Runhistory anymore def get_instance_costs_for_config(runhis: RunHistory, config: Configuration): """ Returns the average cost per instance (across seeds) for a configuration Parameters ---------- config : Configuration from ConfigSpace Parameter configuration Returns ------- cost_per_inst: dict<instance name<str>, cost<float>> """ config_id = runhis.config_ids.get(config) runs_ = runhis._configid_to_inst_seed.get(config_id, []) cost_per_inst = {} for inst, seed in runs_: cost_per_inst[inst] = cost_per_inst.get(inst, []) rkey = RunKey(config_id, inst, seed) vkey = runhis.data[rkey] cost_per_inst[inst].append(vkey.cost) cost_per_inst = dict([(inst, np.mean(costs)) for inst, costs in cost_per_inst.items()]) return cost_per_inst default_conf = runhis_dev.ids_config[1] incumbent_conf = runhis_inc.ids_config[1] dev_vals = get_instance_costs_for_config(runhis_dev, default_conf) inc_vals = get_instance_costs_for_config(runhis_inc, incumbent_conf) # ###### Filter runs for plotting ####### dev_x = [] inc_x = [] for key in set(dev_vals.keys()) & set(inc_vals.keys()): dev_x.append(dev_vals[key]) inc_x.append(inc_vals[key]) # print(dev_vals) # print(inc_vals) print(dev_x) print(inc_x) f = open("results-es" + str(i) + ".txt", "w") f.write("Dev Results: \n" + str(dev_x) + "\n") f.write("Inc Results: \n" + str(inc_x) + "\n") f.close() fig, ax = plt.subplots() ax.scatter(dev_x, inc_x, marker="x") ax.set_xlabel("Default Configuration") ax.set_ylabel("Incumbent Configuration") lims = [ np.min([ax.get_xlim(), ax.get_ylim()]), np.max([ax.get_xlim(), ax.get_ylim()]) ] ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0) # ax.set_xlim(lims) # ax.set_ylim(lims) ax.set_xscale('log') ax.set_yscale('log') fig.savefig("result.png")
def test_nonexisting_output(self): scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality'}) validator = Validator(scen, self.trajectory) path = "test/test_files/validation/test/nonexisting/output" validator.validate(output_fn=path) self.assertTrue(os.path.exists(path))
class ConfiguratorRun(object): """ ConfiguratorRuns load and maintain information about individual configurator runs. There are different supported formats, like: BOHB, SMAC3, SMAC2 and CSV This class is responsible for providing a scenario, a runhistory and a trajectory and handling original/validated data appropriately. To create a ConfiguratorRun from a folder, use Configurator.from_folder() """ def __init__( self, scenario, original_runhistory, validated_runhistory, trajectory, options, path_to_folder=None, ta_exec_dir=None, file_format=None, validation_format=None, budget=None, output_dir=None, ): """ Parameters ---------- scenario: Scenario scenario original_runhistory, validated_runhistory: RunHistory runhistores containing only the original evaluated data (during optimization process) or the validated data where points of interest are reevaluated after the optimization process trajectory: List[dict] a trajectory of the best performing configurations at each point in time options: dict options can define a number of custom settings path_to_folder: str path to the physical folder containing the data ta_exec_dir: str path to the target-algorithm-execution-directory. This is only important for SMAC-optimized data file_format, validation_format: str will be autodetected some point soon, until then, specify the file-format (SMAC2, SMAC3, BOHB, etc...) budget: str int or float a budget, with which this cr is associated output_dir: str where to save analysis-data for this cr """ self.logger = logging.getLogger( "cave.ConfiguratorRun.{}".format(path_to_folder)) self.rng = np.random.RandomState(42) self.options = options self.path_to_folder = path_to_folder self.budget = budget self.scenario = scenario self.original_runhistory = original_runhistory self.validated_runhistory = validated_runhistory self.trajectory = trajectory self.ta_exec_dir = ta_exec_dir self.file_format = file_format self.validation_format = validation_format if not output_dir: self.logger.debug("New outputdir") output_dir = tempfile.mkdtemp() self.output_dir = os.path.join(output_dir, 'analysis_data', self.get_identifier()) os.makedirs(self.output_dir, exist_ok=True) self.default = self.scenario.cs.get_default_configuration() self.incumbent = self.trajectory[-1][ 'incumbent'] if self.trajectory else None self.feature_names = self._get_feature_names() # Create combined runhistory to collect all "real" runs self.combined_runhistory = RunHistory(average_cost) self.combined_runhistory.update(self.original_runhistory, origin=DataOrigin.INTERNAL) if self.validated_runhistory is not None: self.combined_runhistory.update( self.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) # Create runhistory with estimated runs (create Importance-object of pimp and use epm-model for validation) self.epm_runhistory = RunHistory(average_cost) self.epm_runhistory.update(self.combined_runhistory) # Initialize importance and validator self._init_pimp_and_validator() self._validate_default_and_incumbents("epm", self.ta_exec_dir) # Set during execution, to share information between Analyzers self.share_information = { 'parameter_importance': OrderedDict(), 'feature_importance': OrderedDict(), 'evaluators': OrderedDict(), 'validator': None } def get_identifier(self): path = self.path_to_folder if self.path_to_folder is not None else "" budget = str(self.budget) if self.budget is not None else "" if path and budget: res = "_".join([path, budget]) elif not (path or budget): res = 'aggregated' else: res = path if path else budget return res.replace('/', '_') @classmethod def from_folder( cls, folder: str, ta_exec_dir: str, options, file_format: str = 'SMAC3', validation_format: str = 'NONE', budget=None, output_dir=None, ): """Initialize scenario, runhistory and incumbent from folder Parameters ---------- folder: string output-dir of this configurator-run -> this is also the 'id' for a single run in parallel optimization ta_exec_dir: string if the execution directory for the SMAC-run differs from the cwd, there might be problems loading instance-, feature- or PCS-files in the scenario-object. since instance- and PCS-files are necessary, specify the path to the execution-dir of SMAC here file_format: string from [SMAC2, SMAC3, BOHB, CSV] validation_format: string from [SMAC2, SMAC3, CSV, NONE], in which format to look for validated data """ logger = logging.getLogger("cave.ConfiguratorRun.{}".format(folder)) logger.debug( "Loading from \'%s\' with ta_exec_dir \'%s\' with file-format '%s' and validation-format %s. " "Budget (if present): %s", folder, ta_exec_dir, file_format, validation_format, budget) if file_format == 'BOHB': logger.debug( "File format is BOHB, assmuming data was converted to SMAC3-format using " "HpBandSter2SMAC from cave.reader.converter.hpbandster2smac.") validation_format = validation_format if validation_format != 'NONE' else None #### Read in data (scenario, runhistory & trajectory) reader = cls.get_reader(file_format, folder, ta_exec_dir) scenario = reader.get_scenario() scenario_sanity_check(scenario, logger) original_runhistory = reader.get_runhistory(scenario.cs) validated_runhistory = None if validation_format == "NONE" or validation_format is None: validation_format = None else: logger.debug('Using format %s for validation', validation_format) vali_reader = cls.get_reader(validation_format, folder, ta_exec_dir) vali_reader.scen = scenario validated_runhistory = vali_reader.get_validated_runhistory( scenario.cs) #self._check_rh_for_inc_and_def(self.validated_runhistory, 'validated runhistory') logger.info( "Found validated runhistory for \"%s\" and using " "it for evaluation. #configs in validated rh: %d", folder, len(validated_runhistory.config_ids)) trajectory = reader.get_trajectory(scenario.cs) return cls( scenario, original_runhistory, validated_runhistory, trajectory, options, folder, ta_exec_dir, file_format, validation_format, budget=budget, output_dir=output_dir, ) def get_incumbent(self): return self.incumbent def _init_pimp_and_validator( self, alternative_output_dir=None, ): """Create ParameterImportance-object and use it's trained model for validation and further predictions. We pass a combined (original + validated) runhistory, so that the returned model will be based on as much information as possible Parameters ---------- alternative_output_dir: str e.g. for budgets we want pimp to use an alternative output-dir (subfolders per budget) """ self.logger.debug( "Using '%s' as output for pimp", alternative_output_dir if alternative_output_dir else self.output_dir) self.pimp = Importance( scenario=copy.deepcopy(self.scenario), runhistory=self.combined_runhistory, incumbent=self.incumbent if self.incumbent else self.default, save_folder=alternative_output_dir if alternative_output_dir is not None else self.output_dir, seed=self.rng.randint(1, 100000), max_sample_size=self.options['fANOVA'].getint("pimp_max_samples"), fANOVA_pairwise=self.options['fANOVA'].getboolean( "fanova_pairwise"), preprocess=False, verbose=1, # disable progressbars ) # Validator (initialize without trajectory) self.validator = Validator(self.scenario, None, None) self.validator.epm = self.pimp.model @timing def _validate_default_and_incumbents( self, method, ta_exec_dir, ): """Validate default and incumbent configurations on all instances possible. Either use validation (physically execute the target algorithm) or EPM-estimate and update according runhistory (validation -> self.global_validated_rh; epm -> self.global_epm_rh). Parameters ---------- method: str epm or validation ta_exec_dir: str path from where the target algorithm can be executed as found in scenario (only used for actual validation) """ # TODO maybe just validate whole trajectory? self.logger.debug("Validating %s using %s!", self.get_identifier(), method) self.validator.traj = self.trajectory if method == "validation": with _changedir(ta_exec_dir): # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.combined_runhistory) self.validated_runhistory.update(new_rh) self.combined_runhistory_rh.update(new_rh) elif method == "epm": # Only do test-instances if features for test-instances are available instance_mode = 'train+test' if (any([ i not in self.scenario.feature_dict for i in self.scenario.test_insts ]) and any([ i in self.scenario.feature_dict for i in self.scenario.train_insts ])): # noqa self.logger.debug( "No features provided for test-instances (but for train!). Cannot validate on \"epm\"." ) self.logger.warning( "Features detected for train-instances, but not for test-instances. This is " "unintended usage and may lead to errors for some analysis-methods." ) instance_mode = 'train' new_rh = self.validator.validate_epm( 'def+inc', instance_mode, 1, runhistory=self.combined_runhistory) self.epm_runhistory.update(new_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes def _get_feature_names(self): if not self.scenario.feature_dict: self.logger.info( "No features available. Skipping feature analysis.") return feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: self.logger.debug( "`scenario.feature_names` is not set. Loading from '%s'", feat_fn) with _changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature names are missing. Either provide valid feature_file in scenario " "(currently %s) or set `scenario.feature_names` manually." % feat_fn) self.logger.error("Skipping Feature Analysis.") return else: # Feature names are contained in feature-file and retrieved feat_names = InputReader().read_instance_features_file( feat_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) return feat_names def _check_rh_for_inc_and_def(self, rh, name=''): """ Check if default and incumbent are evaluated on all instances in this rh Parameters ---------- rh: RunHistory runhistory to be checked name: str name for logging-purposes Returns ------- return_value: bool False if either inc or def was not evaluated on all train/test-instances """ return_value = True for c_name, c in [("default", self.default), ("inc", self.incumbent)]: runs = rh.get_runs_for_config(c) evaluated = set([inst for inst, seed in runs]) for i_name, i in [("train", self.train_inst), ("test", self.test_inst)]: not_evaluated = set(i) - evaluated if len(not_evaluated) > 0: self.logger.debug( "RunHistory %s only evaluated on %d/%d %s-insts " "for %s in folder %s", name, len(i) - len(not_evaluated), len(i), i_name, c_name, self.folder) return_value = False return return_value @classmethod def get_reader(cls, name, folder, ta_exec_dir): """ Returns an appropriate reader for the specified format. """ # TODO make autodetect format (here? where?) if name == 'SMAC3': return SMAC3Reader(folder, ta_exec_dir) elif name == 'BOHB': return SMAC3Reader(folder, ta_exec_dir) elif name == 'SMAC2': return SMAC2Reader(folder, ta_exec_dir) elif name == 'CSV': return CSVReader(folder, ta_exec_dir) else: raise ValueError("%s not supported as file-format" % name)
class CAVE(object): """ """ def __init__(self, folders: typing.List[str], output: str, ta_exec_dir: Union[str, None] = None, missing_data_method: str = 'epm', max_pimp_samples: int = -1, fanova_pairwise=True): """ Initialize CAVE facade to handle analyzing, plotting and building the report-page easily. During initialization, the analysis-infrastructure is built and the data is validated, meaning the overall best incumbent is found and default+incumbent are evaluated for all instances for all runs, by default using an EPM. The class holds two runhistories: self.original_rh -> only contains runs from the actual data self.validated_rh -> contains original runs and epm-predictions for all incumbents The analyze()-method performs an analysis and outputs a report.html. Arguments --------- folders: list<strings> paths to relevant SMAC runs output: string output for cave to write results (figures + report) ta_exec_dir: string execution directory for target algorithm (to find instance.txt, ..) missing_data_method: string from [validation, epm], how to estimate missing runs """ self.logger = logging.getLogger("cave.cavefacade") self.logger.debug("Folders: %s", str(folders)) self.ta_exec_dir = ta_exec_dir # Create output if necessary self.output = output self.logger.info("Saving results to %s", self.output) if not os.path.exists(output): self.logger.debug("Output-dir %s does not exist, creating", self.output) os.makedirs(output) if not os.path.exists(os.path.join(self.output, "debug")): os.makedirs(os.path.join(self.output, "debug")) # Log to file logger = logging.getLogger() handler = logging.FileHandler( os.path.join(self.output, "debug/debug.log"), "w") handler.setLevel(logging.DEBUG) logger.addHandler(handler) # Global runhistory combines all actual runs of individual SMAC-runs # We save the combined (unvalidated) runhistory to disk, so we can use it later on. # We keep the validated runhistory (with as many runs as possible) in # memory. The distinction is made to avoid using runs that are # only estimated using an EPM for further EPMs or to handle runs # validated on different hardware (depending on validation-method). self.original_rh = RunHistory(average_cost) self.validated_rh = RunHistory(average_cost) # Save all relevant SMAC-runs in a list self.runs = [] for folder in folders: try: self.logger.debug("Collecting data from %s.", folder) self.runs.append(SMACrun(folder, ta_exec_dir)) except Exception as err: self.logger.warning( "Folder %s could not be loaded, failed " "with error message: %s", folder, err) continue if not len(self.runs): raise ValueError( "None of the specified SMAC-folders could be loaded.") # Use scenario of first run for general purposes (expecting they are all the same anyway!) self.scenario = self.runs[0].solver.scenario # Update global runhistory with all available runhistories self.logger.debug("Update original rh with all available rhs!") runhistory_fns = [ os.path.join(run.folder, "runhistory.json") for run in self.runs ] for rh_file in runhistory_fns: self.original_rh.update_from_json(rh_file, self.scenario.cs) self.logger.debug( 'Combined number of Runhistory data points: %d. ' '# Configurations: %d. # Runhistories: %d', len(self.original_rh.data), len(self.original_rh.get_all_configs()), len(runhistory_fns)) self.original_rh.save_json( os.path.join(self.output, "combined_rh.json")) # Validator for a) validating with epm, b) plot over time # Initialize without trajectory self.validator = Validator(self.scenario, None, None) # Estimate missing costs for [def, inc1, inc2, ...] self.complete_data(method=missing_data_method) self.best_run = min( self.runs, key=lambda run: self.validated_rh.get_cost(run.solver.incumbent)) self.default = self.scenario.cs.get_default_configuration() self.incumbent = self.best_run.solver.incumbent self.logger.debug("Overall best run: %s, with incumbent: %s", self.best_run.folder, self.incumbent) # Following variable determines whether a distinction is made # between train and test-instances (e.g. in plotting) self.train_test = bool(self.scenario.train_insts != [None] and self.scenario.test_insts != [None]) self.analyzer = Analyzer(self.original_rh, self.validated_rh, self.default, self.incumbent, self.train_test, self.scenario, self.validator, self.output, max_pimp_samples, fanova_pairwise) self.builder = HTMLBuilder(self.output, "CAVE") # Builder for html-website self.website = OrderedDict([]) def complete_data(self, method="epm"): """Complete missing data of runs to be analyzed. Either using validation or EPM. """ with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): self.logger.info("Completing data using %s.", method) path_for_validated_rhs = os.path.join(self.output, "validated_rhs") for run in self.runs: self.validator.traj = run.traj if method == "validation": # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.original_rh) elif method == "epm": new_rh = self.validator.validate_epm( 'def+inc', 'train+test', 1, runhistory=self.original_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes self.validated_rh.update(new_rh) def analyze(self, performance=True, cdf=True, scatter=True, confviz=True, param_importance=['forward_selection', 'ablation', 'fanova'], feature_analysis=[ "box_violin", "correlation", "feat_importance", "clustering", "feature_cdf" ], parallel_coordinates=True, cost_over_time=True, algo_footprint=True): """Analyze the available data and build HTML-webpage as dict. Save webpage in 'self.output/CAVE/report.html'. Analyzing is performed with the analyzer-instance that is initialized in the __init__ Parameters ---------- performance: bool whether to calculate par10-values cdf: bool whether to plot cdf scatter: bool whether to plot scatter confviz: bool whether to perform configuration visualization param_importance: List[str] containing methods for parameter importance feature_analysis: List[str] containing methods for feature analysis parallel_coordinates: bool whether to plot parallel coordinates cost_over_time: bool whether to plot cost over time algo_footprint: bool whether to plot algorithm footprints """ # Check arguments for p in param_importance: if p not in [ 'forward_selection', 'ablation', 'fanova', 'incneighbor' ]: raise ValueError( "%s not a valid option for parameter " "importance!", p) for f in feature_analysis: if f not in [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ]: raise ValueError("%s not a valid option for feature analysis!", f) # Start analysis overview = self.analyzer.create_overview_table(self.best_run.folder) self.website["Meta Data"] = {"table": overview} compare_config = self.analyzer.config_to_html(self.default, self.incumbent) self.website["Best configuration"] = {"table": compare_config} ########## PERFORMANCE ANALYSIS self.website["Performance Analysis"] = OrderedDict() if performance: performance_table = self.analyzer.create_performance_table( self.default, self.incumbent) self.website["Performance Analysis"]["Performance Table"] = { "table": performance_table } if cdf: cdf_path = self.analyzer.plot_cdf() self.website["Performance Analysis"][ "empirical Cumulative Distribution Function (eCDF)"] = { "figure": cdf_path } if scatter and (self.scenario.train_insts != [[None]]): scatter_path = self.analyzer.plot_scatter() self.website["Performance Analysis"]["Scatterplot"] = { "figure": scatter_path } elif scatter: self.logger.info( "Scatter plot desired, but no instances available.") # Build report before time-consuming analysis self.build_website() if algo_footprint and self.scenario.feature_dict: algorithms = {self.default: "default", self.incumbent: "incumbent"} # Add all available incumbents to test portfolio strategy #for r in self.runs: # if not r.get_incumbent() in algorithms: # algorithms[r.get_incumbent()] = str(self.runs.index(r)) algo_footprint_plots = self.analyzer.plot_algorithm_footprint( algorithms) self.website["Performance Analysis"][ "Algorithm Footprints"] = OrderedDict() for p in algo_footprint_plots: header = os.path.splitext(os.path.split(p)[1])[0] # algo name self.website["Performance Analysis"]["Algorithm Footprints"][ header] = { "figure": p, "tooltip": get_tooltip("Algorithm Footprints") + ": " + header } self.build_website() ########### Configurator's behavior self.website["Configurator's behavior"] = OrderedDict() if confviz: if self.scenario.feature_array is None: self.scenario.feature_array = np.array([[]]) # Sort runhistories and incs wrt cost incumbents = [r.solver.incumbent for r in self.runs] trajectories = [r.traj for r in self.runs] runhistories = [r.runhistory for r in self.runs] costs = [self.validated_rh.get_cost(i) for i in incumbents] costs, incumbents, runhistories, trajectories = ( list(t) for t in zip( *sorted(zip(costs, incumbents, runhistories, trajectories), key=lambda x: x[0]))) incumbents = list(map(lambda x: x['incumbent'], trajectories[0])) confviz_script = self.analyzer.plot_confviz( incumbents, runhistories) self.website["Configurator's behavior"][ "Configurator Footprint"] = { "table": confviz_script } elif confviz: self.logger.info("Configuration visualization desired, but no " "instance-features available.") self.build_website() if cost_over_time: cost_over_time_path = self.analyzer.plot_cost_over_time( self.best_run.traj, self.validator) self.website["Configurator's behavior"]["Cost over time"] = { "figure": cost_over_time_path } self.build_website() self.parameter_importance(ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, incneighbor='incneighbor' in param_importance) self.build_website() if parallel_coordinates: # Should be after parameter importance, if performed. n_params = 6 parallel_path = self.analyzer.plot_parallel_coordinates(n_params) self.website["Configurator's behavior"]["Parallel Coordinates"] = { "figure": parallel_path } self.build_website() if self.scenario.feature_dict: self.feature_analysis(box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) else: self.logger.info('No feature analysis possible') self.logger.info("CAVE finished. Report is located in %s", os.path.join(self.output, 'report.html')) self.build_website() def parameter_importance(self, ablation=False, fanova=False, forward_selection=False, incneighbor=False): """Perform the specified parameter importance procedures. """ # PARAMETER IMPORTANCE if (ablation or forward_selection or fanova or incneighbor): self.website["Parameter Importance"] = OrderedDict() sum_ = 0 if fanova: sum_ += 1 table, plots, pair_plots = self.analyzer.fanova(self.incumbent) self.website["Parameter Importance"]["fANOVA"] = OrderedDict() self.website["Parameter Importance"]["fANOVA"]["Importance"] = { "table": table } # Insert plots (the received plots is a dict, mapping param -> path) self.website["Parameter Importance"]["fANOVA"][ "Marginals"] = OrderedDict([]) for param, plot in plots.items(): self.website["Parameter Importance"]["fANOVA"]["Marginals"][ param] = { "figure": plot } if pair_plots: self.website["Parameter Importance"]["fANOVA"][ "PairwiseMarginals"] = OrderedDict([]) for param, plot in pair_plots.items(): self.website["Parameter Importance"]["fANOVA"][ "PairwiseMarginals"][param] = { "figure": plot } if ablation: sum_ += 1 self.logger.info("Ablation...") self.analyzer.parameter_importance("ablation", self.incumbent, self.output) ablationpercentage_path = os.path.join(self.output, "ablationpercentage.png") ablationperformance_path = os.path.join(self.output, "ablationperformance.png") self.website["Parameter Importance"]["Ablation"] = { "figure": [ablationpercentage_path, ablationperformance_path] } if forward_selection: sum_ += 1 self.logger.info("Forward Selection...") self.analyzer.parameter_importance("forward-selection", self.incumbent, self.output) f_s_barplot_path = os.path.join(self.output, "forward selection-barplot.png") f_s_chng_path = os.path.join(self.output, "forward selection-chng.png") self.website["Parameter Importance"]["Forward Selection"] = { "figure": [f_s_barplot_path, f_s_chng_path] } if incneighbor: sum_ += 1 self.logger.info("Local EPM-predictions around incumbent...") plots = self.analyzer.local_epm_plots() self.website["Parameter Importance"][ "Local Parameter Importance (LPI)"] = OrderedDict([]) for param, plot in plots.items(): self.website["Parameter Importance"][ "Local Parameter Importance (LPI)"][param] = { "figure": plot } if sum_: of = os.path.join(self.output, 'pimp.tex') self.logger.info('Creating pimp latex table at %s' % of) self.analyzer.pimp.table_for_comparison(self.analyzer.evaluators, of, style='latex') def feature_analysis(self, box_violin=False, correlation=False, clustering=False, importance=False): if not (box_violin or correlation or clustering or importance): self.logger.debug("No feature analysis.") return # FEATURE ANALYSIS (ASAPY) # TODO make the following line prettier # TODO feat-names from scenario? in_reader = InputReader() feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature Analysis needs valid feature " "file! Either {} is not a valid " "filename or features are not saved in " "the scenario.") self.logger.error("Skipping Feature Analysis.") return else: feat_names = in_reader.read_instance_features_file( self.scenario.feature_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) self.website["Feature Analysis"] = OrderedDict([]) # feature importance using forward selection if importance: self.website["Feature Analysis"][ "Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance() imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html self.website["Feature Analysis"]["Feature Importance"]["Table"] = { "table": imp } for p in plots: name = os.path.splitext(os.path.basename(p))[0] self.website["Feature Analysis"]["Feature Importance"][ name] = { "figure": p } # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) self.website["Feature Analysis"][ "Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) self.website["Feature Analysis"]["Violin and Box Plots"][ key] = { "figure": plot_tuple[1] } # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: self.website["Feature Analysis"]["Correlation"] = { "figure": correlation_plot } # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) self.website["Feature Analysis"]["Clustering"] = { "figure": cluster_plot } self.build_website() def build_website(self): self.builder.generate_html(self.website)
class CAVE(object): def __init__(self, folders: typing.List[str], output_dir: str, ta_exec_dir: typing.List[str], file_format: str = 'SMAC3', validation_format='NONE', validation_method: str = 'epm', pimp_max_samples: int = -1, fanova_pairwise: bool = True, use_budgets: bool = False, seed: int = 42): """ Initialize CAVE facade to handle analyzing, plotting and building the report-page easily. During initialization, the analysis-infrastructure is built and the data is validated, the overall best incumbent is found and default+incumbent are evaluated for all instances for all runs, by default using an EPM. In the internal data-management the we have three types of runhistories: *original*, *validated* and *epm*. - *original* contain only runs that have been gathered during the optimization-process. - *validated* may contain original runs, but also data that was not gathered iteratively during the optimization, but systematically through external validation of interesting configurations. Important: NO ESTIMATED RUNS IN `validated` RUNHISTORIES! - *epm* contain runs that are gathered through empirical performance models. Runhistories are organized as follows: - each ConfiguratorRun has an *original_runhistory*- and a *combined_runhistory*-attribute - if available, each ConfiguratorRun's *validated_runhistory* contains a runhistory with validation-data gathered after the optimization - *combined_runhistory* always contains as many real runs as possible CaveFacade contains three runhistories: - *original_rh*: original runs that have been performed **during optimization**! - *validated_rh*: runs that have been validated, so they were not part of the original optimization - *epm_rh*: contains epm-predictions for all incumbents The analyze()-method performs an analysis and output a report.html. Arguments --------- folders: list<strings> paths to relevant SMAC runs output_dir: string output for cave to write results (figures + report) ta_exec_dir: string execution directory for target algorithm (to find instance.txt specified in scenario, ..) file_format: str what format the rundata is in, options are [SMAC3, SMAC2 and CSV] validation_method: string from [validation, epm], how to estimate missing runs pimp_max_samples: int passed to PIMP for configuration fanova_pairwise: bool whether to calculate pairwise marginals for fanova use_budgets: bool if true, individual runs are treated as different budgets. they are not evaluated together, but compared against each other. runs are expected in ascending budget-size. seed: int random seed for analysis (e.g. the random forests) """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.output_dir = output_dir self.rng = np.random.RandomState(seed) self.use_budgets = use_budgets self.ta_exec_dir = ta_exec_dir self.file_format = file_format self.validation_format = validation_format self.validation_method = validation_method self.pimp_max_samples = pimp_max_samples self.fanova_pairwise = fanova_pairwise self.bohb_result = None # only relevant for bohb_result # Create output_dir if necessary self.logger.info("Saving results to '%s'", self.output_dir) if not os.path.exists(output_dir): self.logger.debug("Output-dir '%s' does not exist, creating", self.output_dir) os.makedirs(output_dir) if file_format == 'BOHB': if len(folders) != 1: raise ValueError( "For file format BOHB you can only specify one folder.") self.bohb_result, folders = HpBandSter2SMAC().convert(folders[0]) # Save all relevant configurator-runs in a list self.logger.debug("Folders: %s; ta-exec-dirs: %s", str(folders), str(ta_exec_dir)) self.runs = [] if len(ta_exec_dir) < len(folders): for i in range(len(folders) - len(ta_exec_dir)): ta_exec_dir.append(ta_exec_dir[0]) for ta_exec_dir, folder in zip(ta_exec_dir, folders): try: self.logger.debug("Collecting data from %s.", folder) self.runs.append( ConfiguratorRun(folder, ta_exec_dir, file_format=file_format, validation_format=validation_format)) except Exception as err: self.logger.warning( "Folder %s could with ta_exec_dir %s not be loaded, failed with error message: %s", folder, ta_exec_dir, err) self.logger.exception(err) continue if not self.runs: raise ValueError("None of the specified folders could be loaded.") # Use scenario of first run for general purposes (expecting they are all the same anyway! self.scenario = self.runs[0].solver.scenario scenario_sanity_check(self.scenario, self.logger) self.default = self.scenario.cs.get_default_configuration() # All runs that have been actually explored during optimization self.global_original_rh = None # All original runs + validated runs if available self.global_validated_rh = None # All validated runs + EPM-estimated for def and inc on all insts self.global_epm_rh = None self.pimp = None self.model = None if use_budgets: self._init_helper_budgets() else: self._init_helper_no_budgets() self.analyzer = Analyzer(self.default, self.incumbent, self.scenario, self.output_dir, pimp_max_samples, fanova_pairwise, rng=self.rng) # Builder for html-website custom_logo = './custom_logo.png' if file_format.startswith('SMAC'): logo_fn = 'SMAC_logo.png' elif file_format == 'BOHB': logo_fn = 'BOHB_logo.png' elif os.path.exists(custom_logo): logo_fn = custom_logo else: logo_fn = 'ml4aad.png' self.logger.info( "No suitable logo found. You can use a custom logo simply by having a file called '%s' " "in the directory from which you run CAVE.", custom_logo) self.builder = HTMLBuilder(self.output_dir, "CAVE", logo_fn=logo_fn, logo_custom=custom_logo == logo_fn) self.website = OrderedDict([]) def _init_helper_budgets(self): self.best_run = self.runs[-1] self.incumbent = self.best_run.solver.incumbent def _init_helper_no_budgets(self): """No budgets means using global, aggregated runhistories to analyze the Configurator's behaviour. Also it creates an EPM using all available information, since all runs are "equal". """ self.global_original_rh = RunHistory(average_cost) self.global_validated_rh = RunHistory(average_cost) self.global_epm_rh = RunHistory( average_cost) # Save all relevant SMAC-runs in a list self.logger.debug("Update original rh with all available rhs!") for run in self.runs: self.global_original_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) self.global_validated_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: self.global_validated_rh.update( run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) self._init_pimp_and_validator(self.global_validated_rh) # Estimate missing costs for [def, inc1, inc2, ...] self.validate_default_and_incumbents(self.validation_method, self.ta_exec_dir) self.global_epm_rh.update(self.global_validated_rh) for rh_name, rh in [("original", self.global_original_rh), ("validated", self.global_validated_rh), ("epm", self.global_epm_rh)]: self.logger.debug( 'Combined number of RunHistory data points for %s runhistory: %d ' '# Configurations: %d. # Configurator runs: %d', rh_name, len(rh.data), len(rh.get_all_configs()), len(self.runs)) # Sort runs (best first) self.runs = sorted( self.runs, key=lambda run: self.global_epm_rh.get_cost(run.solver.incumbent)) self.best_run = self.runs[0] self.incumbent = self.pimp.incumbent = self.best_run.solver.incumbent self.logger.debug("Overall best run: %s, with incumbent: %s", self.best_run.folder, self.incumbent) def _init_pimp_and_validator(self, rh, alternative_output_dir=None): """Create ParameterImportance-object and use it's trained model for validation and further predictions We pass validated runhistory, so that the returned model will be based on as much information as possible Parameters ---------- rh: RunHistory runhistory used to build EPM alternative_output_dir: str e.g. for budgets we want pimp to use an alternative output-dir (subfolders per budget) """ self.logger.debug( "Using '%s' as output for pimp", alternative_output_dir if alternative_output_dir else self.output_dir) self.pimp = Importance( scenario=copy.deepcopy(self.scenario), runhistory=rh, incumbent=self.default, # Inject correct incumbent later parameters_to_evaluate=4, save_folder=alternative_output_dir if alternative_output_dir else self.output_dir, seed=self.rng.randint(1, 100000), max_sample_size=self.pimp_max_samples, fANOVA_pairwise=self.fanova_pairwise, preprocess=False) self.model = self.pimp.model # Validator (initialize without trajectory) self.validator = Validator(self.scenario, None, None) self.validator.epm = self.model @timing def validate_default_and_incumbents(self, method, ta_exec_dir): """Validate default and incumbent configurations on all instances possible. Either use validation (physically execute the target algorithm) or EPM-estimate and update according runhistory (validation -> self.global_validated_rh; epm -> self.global_epm_rh). Parameters ---------- method: str epm or validation ta_exec_dir: str path from where the target algorithm can be executed as found in scenario (only used for actual validation) """ for run in self.runs: self.logger.debug("Validating %s using %s!", run.folder, method) self.validator.traj = run.traj if method == "validation": with changedir(ta_exec_dir): # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.global_validated_rh) self.global_validated_rh.update(new_rh) elif method == "epm": # Only do test-instances if features for test-instances are available instance_mode = 'train+test' if (any([ i not in self.scenario.feature_dict for i in self.scenario.test_insts ]) and any([ i in self.scenario.feature_dict for i in self.scenario.train_insts ])): # noqa self.logger.debug( "No features provided for test-instances (but for train!). " "Cannot validate on \"epm\".") self.logger.warning( "Features detected for train-instances, but not for test-instances. This is " "unintended usage and may lead to errors for some analysis-methods." ) instance_mode = 'train' new_rh = self.validator.validate_epm( 'def+inc', instance_mode, 1, runhistory=self.global_validated_rh) self.global_epm_rh.update(new_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes @timing def analyze(self, performance=True, cdf=True, scatter=True, cfp=True, cfp_time_slider=False, cfp_max_plot=-1, cfp_number_quantiles=10, param_importance=['forward_selection', 'ablation', 'fanova'], pimp_sort_table_by: str = "average", feature_analysis=[ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ], parallel_coordinates=True, cost_over_time=True, algo_footprint=True): """Analyze the available data and build HTML-webpage as dict. Save webpage in 'self.output_dir/CAVE/report.html'. Analyzing is performed with the analyzer-instance that is initialized in the __init__ Parameters ---------- performance: bool whether to calculate par10-values cdf: bool whether to plot cdf scatter: bool whether to plot scatter cfp: bool whether to perform configuration visualization cfp_time_slider: bool whether to include an interactive time-slider in configuration footprint cfp_max_plot: int limit number of configurations considered for configuration footprint (-1 -> all configs) cfp_number_quantiles: int number of steps over time generated in configuration footprint param_importance: List[str] containing methods for parameter importance pimp_sort_table: str in what order the parameter-importance overview should be organized feature_analysis: List[str] containing methods for feature analysis parallel_coordinates: bool whether to plot parallel coordinates cost_over_time: bool whether to plot cost over time algo_footprint: bool whether to plot algorithm footprints """ # Check arguments for p in param_importance: if p not in ['forward_selection', 'ablation', 'fanova', 'lpi']: raise ValueError( "%s not a valid option for parameter importance!" % p) for f in feature_analysis: if f not in [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ]: raise ValueError( "%s not a valid option for feature analysis!" % f) # Start analysis headings = [ "Meta Data", "Best Configuration", "Performance Analysis", "Configurator's Behavior", "Parameter Importance", "Feature Analysis" ] for h in headings: self.website[h] = OrderedDict() if self.use_budgets: # The individual configurator runs are not directory comparable and cannot be aggregated. # Nevertheless they need to be combined in one comprehensive report and some metrics are to be compared over # the individual runs. # if self.file_format == 'BOHB': # self.website["BOHB Visualization"] = {"figure" : [self.analyzer.bohb_plot(self.bohb_result)]} # Perform analysis for each run for run in self.runs: sub_sec = os.path.basename(run.folder) # Set paths for each budget individual to avoid path-conflicts sub_output_dir = os.path.join(self.output_dir, 'content', sub_sec) os.makedirs(sub_output_dir, exist_ok=True) self.analyzer = Analyzer(run.default, run.incumbent, self.scenario, sub_output_dir, self.pimp_max_samples, self.fanova_pairwise, rng=self.rng) # Set runhistories self.global_original_rh = run.original_runhistory self.global_validated_rh = run.combined_runhistory self.global_epm_rh = RunHistory(average_cost) # Train epm and stuff self._init_pimp_and_validator( run.combined_runhistory, alternative_output_dir=sub_output_dir) self.validate_default_and_incumbents(self.validation_method, run.ta_exec_dir) self.pimp.incumbent = run.incumbent self.incumbent = run.incumbent run.epm_rh = self.global_epm_rh self.best_run = run # Perform analysis overview = self.analyzer.create_overview_table( self.global_original_rh, run, len(self.runs), self.default, self.incumbent) self.website["Meta Data"][sub_sec] = {"table": overview} compare_config_html = compare_configs_to_html( self.default, self.incumbent) self.website["Best Configuration"][sub_sec] = { "table": compare_config_html } d = self.website["Performance Analysis"][ sub_sec] = OrderedDict() self.performance_analysis(d, performance, cdf, scatter, algo_footprint) d = self.website["Parameter Importance"][ sub_sec] = OrderedDict() self.parameter_importance( d, ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, lpi='lpi' in param_importance, pimp_sort_table_by=pimp_sort_table_by) d = self.website["Configurator's Behavior"][ sub_sec] = OrderedDict() self.configurators_behavior(d, cost_over_time, cfp, cfp_max_plot, cfp_time_slider, cfp_number_quantiles, parallel_coordinates) d = self.website["Feature Analysis"][sub_sec] = OrderedDict() self.feature_analysis(d, box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) self.original_runhistory = self.validated_runhistory = self.epm_runhistory = None else: overview = self.analyzer.create_overview_table( self.global_original_rh, self.runs[0], len(self.runs), self.default, self.incumbent) self.website["Meta Data"] = {"table": overview} compare_config_html = compare_configs_to_html( self.default, self.incumbent) self.website["Best Configuration"] = {"table": compare_config_html} self.performance_analysis(self.website["Performance Analysis"], performance, cdf, scatter, algo_footprint) self.parameter_importance(self.website["Parameter Importance"], ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, lpi='lpi' in param_importance, pimp_sort_table_by=pimp_sort_table_by) self.configurators_behavior( self.website["Configurator's Behavior"], cost_over_time, cfp, cfp_max_plot, cfp_time_slider, cfp_number_quantiles, parallel_coordinates) self.feature_analysis(self.website["Feature Analysis"], box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) self.build_website() self.logger.info("CAVE finished. Report is located in %s", os.path.join(self.output_dir, 'report.html')) def performance_analysis(self, d, performance, cdf, scatter, algo_footprint): """Generate performance analysis. Parameters ---------- d: dictionary dictionary to add entries to performance, cdf, scatter, algo_footprint: bool what analysis-methods to perform """ if performance: instances = [ i for i in self.scenario.train_insts + self.scenario.test_insts if i ] oracle = self.analyzer.get_oracle(instances, self.global_validated_rh) performance_table = self.analyzer.create_performance_table( self.default, self.incumbent, self.global_epm_rh, oracle) d["Performance Table"] = {"table": performance_table} if cdf: cdf_paths = self.analyzer.plot_cdf_compare(self.default, self.incumbent, self.global_epm_rh) if cdf_paths: d["empirical Cumulative Distribution Function (eCDF)"] = { "figure": cdf_paths } if scatter: scatter_paths = self.analyzer.plot_scatter(self.default, self.incumbent, self.global_epm_rh) if scatter_paths: d["Scatterplot"] = {"figure": scatter_paths} self.build_website() if algo_footprint and self.scenario.feature_dict: algorithms = [(self.default, "default"), (self.incumbent, "incumbent")] algo_footprint_plots = self.analyzer.plot_algorithm_footprint( self.global_epm_rh, algorithms) d["Algorithm Footprints"] = OrderedDict() # Interactive bokeh-plot script, div = algo_footprint_plots[0] d["Algorithm Footprints"]["Interactive Algorithm Footprint"] = { "bokeh": (script, div) } p_3d = algo_footprint_plots[1] for plots in p_3d: header = os.path.splitext(os.path.split(plots[0])[1])[0][10:-2] header = header[0].upper() + header[1:].replace('_', ' ') d["Algorithm Footprints"][header] = {"figure_x2": plots} self.build_website() def configurators_behavior(self, d, cost_over_time=False, cfp=False, cfp_max_plot=-1, cfp_time_slider=False, cfp_number_quantiles=1, parallel_coordinates=False): if cost_over_time: cost_over_time_script = self.analyzer.plot_cost_over_time( self.global_validated_rh, self.runs, self.validator) d["Cost Over Time"] = {"bokeh": cost_over_time_script} self.build_website() if cfp: # Configurator Footprint runs = [self.best_run] if self.use_budgets else self.runs res = self.analyzer.plot_configurator_footprint( self.scenario, runs, self.global_original_rh, max_confs=cfp_max_plot, time_slider=(cfp_time_slider and (cfp_number_quantiles > 1)), num_quantiles=cfp_number_quantiles) bokeh_components, cfp_paths = res if cfp_number_quantiles == 1: # Only one plot, no need for "Static"-field d["Configurator Footprint"] = {"bokeh": (bokeh_components)} else: d["Configurator Footprint"] = OrderedDict() d["Configurator Footprint"]["Interactive"] = { "bokeh": (bokeh_components) } if all([True for p in cfp_paths if os.path.exists(p) ]): # If the plots were actually generated d["Configurator Footprint"]["Static"] = { "figure": cfp_paths } else: d["Configurator Footprint"]["Static"] = { "else": "This plot is missing. Maybe it was not generated? " "Check if you installed selenium and phantomjs " "correctly to activate bokeh-exports. " "(https://automl.github.io/CAVE/stable/faq.html)" } self.build_website() if parallel_coordinates: # Should be after parameter importance, if performed. n_params = 6 parallel_path = self.analyzer.plot_parallel_coordinates( self.global_original_rh, self.global_validated_rh, self.validator, n_params) if parallel_path: d["Parallel Coordinates"] = {"figure": parallel_path} self.build_website() def parameter_importance(self, d, ablation=False, fanova=False, forward_selection=False, lpi=False, pimp_sort_table_by='average'): """Perform the specified parameter importance procedures. """ sum_ = 0 if fanova: sum_ += 1 self.logger.info("fANOVA...") d["fANOVA"] = OrderedDict() try: table, plots, pair_plots = self.analyzer.fanova( self.pimp, self.incumbent) d["fANOVA"]["Importance"] = {"table": table} # Insert plots (the received plots is a dict, mapping param -> path) d["fANOVA"]["Marginals"] = OrderedDict() for param, plot in plots.items(): d["fANOVA"]["Marginals"][param] = {"figure": plot} if pair_plots: d["fANOVA"]["Pairwise Marginals"] = OrderedDict() for param, plot in pair_plots.items(): d["fANOVA"]["Pairwise Marginals"][param] = { "figure": plot } except RuntimeError as e: err = "Encountered error '%s' in fANOVA, this can e.g. happen with too few data-points." % e self.logger.exception(err) d["fANOVA"] = { "else": err + " Check 'debug/debug.log' for more information." } self.build_website() if ablation: sum_ += 1 self.logger.info("Ablation...") self.analyzer.parameter_importance(self.pimp, "ablation", self.incumbent, self.analyzer.output_dir) ablationpercentage_path = os.path.join(self.analyzer.output_dir, "ablationpercentage.png") ablationperformance_path = os.path.join(self.analyzer.output_dir, "ablationperformance.png") d["Ablation"] = { "figure": [ablationpercentage_path, ablationperformance_path] } self.build_website() if forward_selection: sum_ += 1 self.logger.info("Forward Selection...") self.analyzer.parameter_importance(self.pimp, "forward-selection", self.incumbent, self.analyzer.output_dir) f_s_barplot_path = os.path.join(self.analyzer.output_dir, "forward-selection-barplot.png") f_s_chng_path = os.path.join(self.analyzer.output_dir, "forward-selection-chng.png") d["Forward Selection"] = { "figure": [f_s_barplot_path, f_s_chng_path] } self.build_website() if lpi: sum_ += 1 self.logger.info("Local EPM-predictions around incumbent...") plots = self.analyzer.local_epm_plots(self.pimp) d["Local Parameter Importance (LPI)"] = OrderedDict() for param, plot in plots.items(): d["Local Parameter Importance (LPI)"][param] = {"figure": plot} self.build_website() if sum_ >= 2: out_fn = os.path.join(self.output_dir, 'pimp.tex') self.logger.info('Creating pimp latex table at %s' % out_fn) self.pimp.table_for_comparison(self.analyzer.evaluators, out_fn, style='latex') table = self.analyzer.importance_table(pimp_sort_table_by) d["Importance Table"] = { "table": table, "tooltip": "Parameters are sorted by {}. Note, that the values are not " "directly comparable, since the different techniques " "provide different metrics (see respective tooltips " "for details on the differences).".format(pimp_sort_table_by) } d.move_to_end("Importance Table", last=False) self.build_website() def feature_analysis(self, d, box_violin=False, correlation=False, clustering=False, importance=False): if not self.scenario.feature_dict: self.logger.error( "No features available. Skipping feature analysis.") return feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: self.logger.debug( "`scenario.feature_names` is not set. Loading from '%s'", feat_fn) with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature names are missing. Either provide valid feature_file in scenario " "(currently %s) or set `scenario.feature_names` manually." % feat_fn) self.logger.error("Skipping Feature Analysis.") return else: # Feature names are contained in feature-file and retrieved feat_names = InputReader().read_instance_features_file( feat_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) # feature importance using forward selection if importance: d["Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance(self.pimp) imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html d["Feature Importance"]["Table"] = {"table": imp} for p in plots: name = os.path.splitext(os.path.basename(p))[0] d["Feature Importance"][name] = {"figure": p} # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) d["Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) d["Violin and Box Plots"][key] = {"figure": plot_tuple[1]} # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: d["Correlation"] = {"figure": correlation_plot} # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) d["Clustering"] = {"figure": cluster_plot} self.build_website() def build_website(self): self.builder.generate_html(self.website)
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42)) # optimize with SMAC best_conf = smac.optimize() print("*********************Incumbent Found:") print(best_conf) # ###### Validation part ############## traj_logger = TrajLogger(None, Stats(scenario)) trajectory = traj_logger.read_traj_aclib_format( "smac-output/run_1/traj_aclib2.json", scenario.cs) # print(trajectory) validator = Validator(scenario, trajectory, rng=np.random.RandomState(42)) # evaluate on test instances and calculate cpu time runhis_dev = validator.validate(config_mode="def", instance_mode="test") runhis_inc = validator.validate(config_mode="inc", instance_mode="test") # print("*********************Runhistory Dev of Validation Part:") # print(runhis_dev.data) # print("*********************Runhistory Inc of Validation Part:") # print(runhis_inc.data) # copied from the smac documentation, is not included in Runhistory anymore def get_instance_costs_for_config(runhis: RunHistory, config: Configuration): """ Returns the average cost per instance (across seeds) for a configuration Parameters ----------