def _aggregate(self, runs): """ """ orig_rh, vali_rh = RunHistory(average_cost), RunHistory(average_cost) for run in runs: orig_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) vali_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: vali_rh.update(run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) for rh_name, rh in [("original", orig_rh), ("validated", vali_rh), ]: self.logger.debug('Combined number of %s RunHistory data points: %d ' '# Configurations: %d. # Configurator runs: %d', rh_name, len(rh.data), len(rh.get_all_configs()), len(runs)) traj = combine_trajectories([run.trajectory for run in runs], self.logger) path_to_folder = runs[0].path_to_folder if len(set([r.path_to_folder for r in runs])) == 1 else None budget = runs[0].budget if len(set([r.budget for r in runs])) == 1 else None new_cr = ConfiguratorRun(runs[0].scenario, orig_rh, vali_rh, traj, self.analyzing_options, output_dir=self.output_dir, path_to_folder=path_to_folder, budget=budget, ) return new_cr
def _get_bohb_avg(self, validator, runs, rh): if len(runs) > 1 and self.bohb_result: # Add bohb-specific line # Get collective rh rh_bohb = RunHistory(average_cost) for run in runs: rh_bohb.update(run.combined_runhistory) #self.logger.debug(rh_bohb.data) # Get collective trajectory traj = HpBandSter2SMAC().get_trajectory({'': self.bohb_result}, '', self.scenario, rh_bohb) #self.logger.debug(traj) mean, time, configs = [], [], [] traj_dict = self.bohb_result.get_incumbent_trajectory() mean, _, time, configs = self._get_mean_var_time( validator, traj, False, rh_bohb) configs, time, budget, mean = traj_dict['config_ids'], traj_dict[ 'times_finished'], traj_dict['budgets'], traj_dict['losses'] time_double = [t for sub in zip(time, time) for t in sub][1:] mean_double = [t for sub in zip(mean, mean) for t in sub][:-1] configs_double = [c for sub in zip(configs, configs) for c in sub][:-1] return Line('all_budgets', time_double, mean_double, mean_double, mean_double, configs_double)
def merge_foreign_data( scenario: Scenario, runhistory: RunHistory, in_scenario_list: typing.List[Scenario], in_runhistory_list: typing.List[RunHistory] ) -> typing.Tuple[Scenario, RunHistory]: """Extend <scenario> and <runhistory> with runhistory data from another <in_scenario> assuming the same pcs, feature space, but different instances Parameters ---------- scenario: Scenario original scenario -- feature dictionary will be extended runhistory: RunHistory original runhistory -- will be extended by further data points in_scenario_list: typing.List[Scenario] input scenario in_runhistory_list: typing.List[RunHistory] list of runhistories wrt <in_scenario> Returns ------- scenario: Scenario runhistory: Runhistory """ # add further instance features for in_scenario in in_scenario_list: if scenario.n_features != in_scenario.n_features: raise ValueError( "Feature Space has to be the same for both scenarios (%d vs %d)." % (scenario.n_features, in_scenario.n_features)) if scenario.cs != in_scenario.cs: # type: ignore[attr-defined] # noqa F821 raise ValueError("PCS of both scenarios have to be identical.") if scenario.cutoff != in_scenario.cutoff: # type: ignore[attr-defined] # noqa F821 raise ValueError("Cutoffs of both scenarios have to be identical.") scenario.feature_dict.update(in_scenario.feature_dict) # extend runhistory for rh in in_runhistory_list: runhistory.update(rh, origin=DataOrigin.EXTERNAL_DIFFERENT_INSTANCES) for date in runhistory.data: if scenario.feature_dict.get(date.instance_id) is None: raise ValueError( "Instance feature for \"%s\" was not found in scenario data." % (date.instance_id)) runhistory.compute_all_costs(instances=scenario.train_insts) return scenario, runhistory
def merge_foreign_data(scenario: Scenario, runhistory: RunHistory, in_scenario_list: typing.List[Scenario], in_runhistory_list: typing.List[RunHistory]): ''' extend <scenario> and <runhistory> with runhistory data from another <in_scenario> assuming the same pcs, feature space, but different instances Arguments --------- scenario: Scenario original scenario -- feature dictionary will be extended runhistory: RunHistory original runhistory -- will be extended by further data points in_scenario_list: typing.List[Scenario] input scenario in_runhistory_list: typing.List[RunHistory] list of runhistories wrt <in_scenario> Returns ------- scenario, runhistory ''' # add further instance features for in_scenario in in_scenario_list: if scenario.n_features != in_scenario.n_features: raise ValueError( "Feature Space has to be the same for both scenarios (%d vs %d)." % (scenario.n_features, in_scenario.n_features)) if scenario.cs != in_scenario.cs: raise ValueError("PCS of both scenarios have to be identical.") if scenario.cutoff != in_scenario.cutoff: raise ValueError("Cutoffs of both scenarios have to be identical.") scenario.feature_dict.update(in_scenario.feature_dict) # extend runhistory for rh in in_runhistory_list: runhistory.update(rh, external_data=True) for date in runhistory.data: if scenario.feature_dict.get(date.instance_id) is None: raise ValueError( "Instance feature for \"%s\" was not found in scenario data." % (date.instance_id)) runhistory.compute_all_costs(instances=scenario.train_insts) return scenario, runhistory
def _aggregate(self, runs): # path_to_folder is the concatenation of all the paths of the individual runs path_to_folder = '-'.join( sorted(list(set([r.path_to_folder for r in runs])))) # budgets are the union of individual budgets. if they are not the same for all runs (no usecase atm), # they get an additional entry of the hash over the string of the combination to avoid false-positives budgets = [r.reduced_to_budgets for r in runs] budget_hash = ['budgetmix-%d' % (hash(str(budgets))) ] if len(set([frozenset(b) for b in budgets])) != 1 else [] budgets = [ a for b in [x for x in budgets if x is not None] for a in b ] + budget_hash if ConfiguratorRun.identify(path_to_folder, budgets) in self.cache: return self.cache[ConfiguratorRun.identify(path_to_folder, budgets)] orig_rh, vali_rh = RunHistory(), RunHistory() for run in runs: orig_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) vali_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: vali_rh.update(run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) for rh_name, rh in [ ("original", orig_rh), ("validated", vali_rh), ]: self.logger.debug( 'Combined number of %s RunHistory data points: %d ' '# Configurations: %d. # Configurator runs: %d', rh_name, len(rh.data), len(rh.get_all_configs()), len(runs)) traj = combine_trajectories([run.trajectory for run in runs], self.logger) new_cr = ConfiguratorRun( runs[0].scenario, orig_rh, vali_rh, traj, self.analyzing_options, output_dir=self.output_dir, path_to_folder=path_to_folder, reduced_to_budgets=budgets, ) self._cache(new_cr) return new_cr
def __init__( self, original_rh: RunHistory, validated_rh: RunHistory, validator: Validator, scenario: Scenario, default: Configuration, incumbent: Configuration, param_imp: Union[None, Dict[str, float]], params: Union[int, List[str]], n_configs: int, pc_sort_by: str, output_dir: str, cs: ConfigurationSpace, runtime: bool = False, max_runs_epm: int = 3000000, ): """This function prepares the data from a SMAC-related format (using runhistories and parameters) to a more general format (using a dataframe). The resulting dataframe is passed to the parallel_coordinates-routine Parameters ---------- original_rh: RunHistory runhistory that should contain only runs that were executed during search validated_rh: RunHistory runhistory that may contain as many runs as possible, also external runs. this runhistory will be used to build the EPM validator: Validator validator to be used to estimate costs for configurations scenario: Scenario scenario object to take instances from default, incumbent: Configuration default and incumbent, they will surely be displayed param_imp: Union[None, Dict[str->float] if given, maps parameter-names to importance params: Union[int, List[str]] either directly the parameters to displayed or the number of parameters (will try to define the most important ones n_configs: int number of configs to be plotted pc_sort_by: str defines the pimp-method by which to choose the plotted parameters max_runs_epm: int maximum number of runs to train the epm with. this should prevent MemoryErrors output_dir: str output directory for plots cs: ConfigurationSpace parameter configuration space to be visualized runtime: boolean runtime will be on logscale """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.error = None self.default = default self.param_imp = param_imp self.cs = cs # Sorting by importance, if possible (choose first executed parameter-importance) self.method, self.importance = "", {} if pc_sort_by == 'all': self.logger.debug("Sorting by average importance") self.method = 'average' for m, i in self.param_imp.items(): if i: for p, imp in i.items(): if p in self.importance: self.importance[p].append(imp) else: self.importance[p] = [imp] self.importance = { k: sum(v) / len(v) for k, v in self.importance.items() } elif pc_sort_by in self.param_imp: self.method, self.importance = pc_sort_by, self.param_imp[ pc_sort_by] else: self.logger.debug("%s not evaluated.. choosing at random from: %s", pc_sort_by, str(list(self.param_imp.keys()))) for m, i in self.param_imp.items(): if i: self.method, self.importance = m, i break self.hp_names = sorted( [hp for hp in self.cs.get_hyperparameter_names()], key=lambda x: self.importance.get(x, 0), reverse=True) self.logger.debug("Sorted hp's by method \'%s\': %s", self.method, str(self.hp_names)) # To be set self.plots = [] # Define set of configurations (limiting to max and choosing most interesting ones) all_configs = original_rh.get_all_configs() max_runs_epm = 300000 # Maximum total number of runs considered for epm to limit maximum possible number configs max_configs = int( max_runs_epm / (len(scenario.train_insts) + len(scenario.test_insts))) if len(all_configs) > max_configs: self.logger.debug( "Limiting number of configs to train epm from %d to %d (based on max runs %d) and choosing " "the ones with the most runs (for parallel coordinates)", len(all_configs), max_configs, max_runs_epm) all_configs = sorted( all_configs, key=lambda c: len(original_rh.get_runs_for_config(c) ))[:max_configs] if not default in all_configs: all_configs = [default] + all_configs if not incumbent in all_configs: all_configs.append(incumbent) # Get costs for those configurations epm_rh = RunHistory(average_cost) epm_rh.update(validated_rh) if scenario.feature_dict: # if instances are available epm_rh.update( timing(validator.validate_epm)(all_configs, 'train+test', 1, runhistory=validated_rh)) self.config_to_cost = {c: epm_rh.get_cost(c) for c in all_configs} self.params = self.get_params(params) self.n_configs = n_configs self.pcp = ParallelCoordinatesPlotter(self.config_to_cost, output_dir, cs, runtime)
costvalue.append(abs(dists[i][j] - low_dists[i][j])) costvalue = sum(costvalue) / len(costvalue) return costvalue runs = [(ConfiguratorRun('../../Branin/Smac3/smac3-output/run_1/', '../../Branin/Smac3/', file_format='SMAC3', validation_format='NONE'))] global_original_rh = RunHistory(average_cost) global_validated_rh = RunHistory(average_cost) global_epm_rh = RunHistory(average_cost) for run in runs: global_original_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) global_validated_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: global_validated_rh.update(run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) global_epm_rh.update(global_validated_rh) runs = sorted(runs, key=lambda run: global_epm_rh.get_cost(run.solver.incumbent)) class TestRunhistory(unittest.TestCase): def test_classification(self): """Function to test, if random and local runhistory created correctly""" # combined = help.combine_runhistories(runs)
class ConfiguratorRun(SMAC): """ ConfiguratorRuns load and maintain information about individual configurator runs. There are three supported formats: SMAC3, SMAC2 and CSV This class is responsible for providing a scenario, a runhistory and a trajectory and handling original/validated data appropriately. """ def __init__(self, folder: str, ta_exec_dir: str, file_format: str = 'SMAC3', validation_format: str = 'NONE'): """Initialize scenario, runhistory and incumbent from folder, execute init-method of SMAC facade (so you could simply use SMAC-instances instead) Parameters ---------- folder: string output-dir of this run ta_exec_dir: string if the execution directory for the SMAC-run differs from the cwd, there might be problems loading instance-, feature- or PCS-files in the scenario-object. since instance- and PCS-files are necessary, specify the path to the execution-dir of SMAC here file_format: string from [SMAC2, SMAC3, BOHB, CSV] validation_format: string from [SMAC2, SMAC3, CSV, NONE], in which format to look for validated data """ self.logger = logging.getLogger( "cave.ConfiguratorRun.{}".format(folder)) self.cave = None # Set if we analyze configurators that use budgets self.folder = folder self.ta_exec_dir = ta_exec_dir self.file_format = file_format self.validation_format = validation_format self.logger.debug("Loading from \'%s\' with ta_exec_dir \'%s\'.", folder, ta_exec_dir) if validation_format == 'NONE': validation_format = None def get_reader(name): if name == 'SMAC3': return SMAC3Reader(folder, ta_exec_dir) elif name == 'BOHB': self.logger.debug( "File format is BOHB, assmuming data was converted to SMAC3-format using " "HpBandSter2SMAC from cave.utils.converter.hpbandster2smac." ) return SMAC3Reader(folder, ta_exec_dir) elif name == 'SMAC2': return SMAC2Reader(folder, ta_exec_dir) elif name == 'CSV': return CSVReader(folder, ta_exec_dir) else: raise ValueError("%s not supported as file-format" % name) self.reader = get_reader(file_format) self.scen = self.reader.get_scenario() self.original_runhistory = self.reader.get_runhistory(self.scen.cs) self.validated_runhistory = None self.traj = self.reader.get_trajectory(cs=self.scen.cs) self.default = self.scen.cs.get_default_configuration() self.incumbent = self.traj[-1]['incumbent'] self.train_inst = self.scen.train_insts self.test_inst = self.scen.test_insts self._check_rh_for_inc_and_def(self.original_runhistory, 'original runhistory') if validation_format: self.logger.debug('Using format %s for validation', validation_format) reader = get_reader(validation_format) reader.scen = self.scen self.validated_runhistory = reader.get_validated_runhistory( self.scen.cs) self._check_rh_for_inc_and_def(self.validated_runhistory, 'validated runhistory') self.logger.info( "Found validated runhistory for \"%s\" and using " "it for evaluation. #configs in validated rh: %d", self.folder, len(self.validated_runhistory.config_ids)) self.combined_runhistory = RunHistory(average_cost) self.combined_runhistory.update(self.original_runhistory, origin=DataOrigin.INTERNAL) if self.validated_runhistory: self.combined_runhistory.update( self.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) self.epm_runhistory = RunHistory(average_cost) self.epm_runhistory.update(self.combined_runhistory) # Initialize SMAC-object super().__init__(scenario=self.scen, runhistory=self.combined_runhistory ) # restore_incumbent=incumbent) # TODO use restore, delete next line self.solver.incumbent = self.incumbent def get_incumbent(self): return self.solver.incumbent def _check_rh_for_inc_and_def(self, rh, name=''): """ Check if default and incumbent are evaluated on all instances in this rh Parameters ---------- rh: RunHistory runhistory to be checked name: str name for logging-purposes Returns ------- return_value: bool False if either inc or def was not evaluated on all train/test-instances """ return_value = True for c_name, c in [("default", self.default), ("inc", self.incumbent)]: runs = rh.get_runs_for_config(c) evaluated = set([inst for inst, seed in runs]) for i_name, i in [("train", self.train_inst), ("test", self.test_inst)]: not_evaluated = set(i) - evaluated if len(not_evaluated) > 0: self.logger.debug( "RunHistory %s only evaluated on %d/%d %s-insts " "for %s in folder %s", name, len(i) - len(not_evaluated), len(i), i_name, c_name, self.folder) return_value = False return return_value
def _plot_parallel_coordinates( self, original_rh: RunHistory, validated_rh: RunHistory, validator: Validator, scenario: Scenario, default: Configuration, incumbent: Configuration, param_imp: Union[None, Dict[str, float]], output_dir: str, cs: ConfigurationSpace, runtime: bool = False, ): """ Parameters: ----------- original_rh: RunHistory runhistory that should contain only runs that were executed during search validated_rh: RunHistory runhistory that may contain as many runs as possible, also external runs. this runhistory will be used to build the EPM validator: Validator validator to be used to estimate costs for configurations scenario: Scenario scenario object to take instances from default, incumbent: Configuration default and incumbent, they will surely be displayed param_imp: Union[None, Dict[str->float] if given, maps parameter-names to importance output_dir: str output directory for plots cs: ConfigurationSpace parameter configuration space to be visualized runtime: boolean runtime will be on logscale """ # Sorting parameters by importance, if possible (choose first executed parameter-importance) method, importance = "", {} if self.pc_sort_by == 'all': self.logger.debug("Sorting by average importance") method = 'average' for m, i in param_imp.items(): if i: for p, imp in i.items(): if p in importance: importance[p].append(imp) else: importance[p] = [imp] importance = {k: sum(v) / len(v) for k, v in importance.items()} elif self.pc_sort_by in param_imp: method, importance = self.pc_sort_by, param_imp[self.pc_sort_by] else: self.logger.debug("%s not evaluated.. choosing at random from: %s", self.pc_sort_by, str(list(param_imp.keys()))) for m, i in param_imp.items(): if i: method, importance = m, i self.logger.debug("Chose %s", method) break hp_names = sorted([hp for hp in cs.get_hyperparameter_names()], key=lambda x: importance.get(x, 0), reverse=True) self.logger.debug("Sorted hp's by method \'%s\': %s", method, str(hp_names)) # To be set self.plots = [] # Define set of configurations (limiting to max and choosing most interesting ones) all_configs = original_rh.get_all_configs() max_runs_epm = self.max_runs_epm # Maximum total number of runs considered for epm to limit maximum possible number configs max_configs = int( max_runs_epm / (len(scenario.train_insts) + len(scenario.test_insts))) if len(all_configs) > max_configs: self.logger.debug( "Limiting number of configs to train epm from %d to %d (based on max runs %d) and choosing " "the ones with the most runs (for parallel coordinates)", len(all_configs), max_configs, max_runs_epm) all_configs = sorted( all_configs, key=lambda c: len(original_rh.get_runs_for_config(c) ))[:max_configs] if not default in all_configs: all_configs = [default] + all_configs if not incumbent in all_configs: all_configs.append(incumbent) # Get costs for those configurations epm_rh = RunHistory(average_cost) epm_rh.update(validated_rh) if scenario.feature_dict: # if instances are available epm_rh.update( timing(validator.validate_epm)(all_configs, 'train+test', 1, runhistory=validated_rh)) config_to_cost = {c: epm_rh.get_cost(c) for c in all_configs} pcp = ParallelCoordinatesPlotter(config_to_cost, output_dir, cs, runtime) try: plots = [ pcp.plot_n_configs( self.n_configs, self.get_params(self.params, importance, hp_names)) ] self.logger.debug("Paths to plot(s): %s", str(plots)) return {'figure': plots} except ValueError as err: self.logger.debug("Error: %s", str(err)) return {'else': str(err)}
class ConfiguratorRun(object): """ ConfiguratorRuns load and maintain information about individual configurator runs. There are different supported formats, like: BOHB, SMAC3, SMAC2 and CSV This class is responsible for providing a scenario, a runhistory and a trajectory and handling original/validated data appropriately. To create a ConfiguratorRun from a folder, use Configurator.from_folder() """ def __init__( self, scenario, original_runhistory, validated_runhistory, trajectory, options, path_to_folder=None, ta_exec_dir=None, file_format=None, validation_format=None, budget=None, output_dir=None, ): """ Parameters ---------- scenario: Scenario scenario original_runhistory, validated_runhistory: RunHistory runhistores containing only the original evaluated data (during optimization process) or the validated data where points of interest are reevaluated after the optimization process trajectory: List[dict] a trajectory of the best performing configurations at each point in time options: dict options can define a number of custom settings path_to_folder: str path to the physical folder containing the data ta_exec_dir: str path to the target-algorithm-execution-directory. This is only important for SMAC-optimized data file_format, validation_format: str will be autodetected some point soon, until then, specify the file-format (SMAC2, SMAC3, BOHB, etc...) budget: str int or float a budget, with which this cr is associated output_dir: str where to save analysis-data for this cr """ self.logger = logging.getLogger( "cave.ConfiguratorRun.{}".format(path_to_folder)) self.rng = np.random.RandomState(42) self.options = options self.path_to_folder = path_to_folder self.budget = budget self.scenario = scenario self.original_runhistory = original_runhistory self.validated_runhistory = validated_runhistory self.trajectory = trajectory self.ta_exec_dir = ta_exec_dir self.file_format = file_format self.validation_format = validation_format if not output_dir: self.logger.debug("New outputdir") output_dir = tempfile.mkdtemp() self.output_dir = os.path.join(output_dir, 'analysis_data', self.get_identifier()) os.makedirs(self.output_dir, exist_ok=True) self.default = self.scenario.cs.get_default_configuration() self.incumbent = self.trajectory[-1][ 'incumbent'] if self.trajectory else None self.feature_names = self._get_feature_names() # Create combined runhistory to collect all "real" runs self.combined_runhistory = RunHistory(average_cost) self.combined_runhistory.update(self.original_runhistory, origin=DataOrigin.INTERNAL) if self.validated_runhistory is not None: self.combined_runhistory.update( self.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) # Create runhistory with estimated runs (create Importance-object of pimp and use epm-model for validation) self.epm_runhistory = RunHistory(average_cost) self.epm_runhistory.update(self.combined_runhistory) # Initialize importance and validator self._init_pimp_and_validator() self._validate_default_and_incumbents("epm", self.ta_exec_dir) # Set during execution, to share information between Analyzers self.share_information = { 'parameter_importance': OrderedDict(), 'feature_importance': OrderedDict(), 'evaluators': OrderedDict(), 'validator': None } def get_identifier(self): path = self.path_to_folder if self.path_to_folder is not None else "" budget = str(self.budget) if self.budget is not None else "" if path and budget: res = "_".join([path, budget]) elif not (path or budget): res = 'aggregated' else: res = path if path else budget return res.replace('/', '_') @classmethod def from_folder( cls, folder: str, ta_exec_dir: str, options, file_format: str = 'SMAC3', validation_format: str = 'NONE', budget=None, output_dir=None, ): """Initialize scenario, runhistory and incumbent from folder Parameters ---------- folder: string output-dir of this configurator-run -> this is also the 'id' for a single run in parallel optimization ta_exec_dir: string if the execution directory for the SMAC-run differs from the cwd, there might be problems loading instance-, feature- or PCS-files in the scenario-object. since instance- and PCS-files are necessary, specify the path to the execution-dir of SMAC here file_format: string from [SMAC2, SMAC3, BOHB, CSV] validation_format: string from [SMAC2, SMAC3, CSV, NONE], in which format to look for validated data """ logger = logging.getLogger("cave.ConfiguratorRun.{}".format(folder)) logger.debug( "Loading from \'%s\' with ta_exec_dir \'%s\' with file-format '%s' and validation-format %s. " "Budget (if present): %s", folder, ta_exec_dir, file_format, validation_format, budget) if file_format == 'BOHB': logger.debug( "File format is BOHB, assmuming data was converted to SMAC3-format using " "HpBandSter2SMAC from cave.reader.converter.hpbandster2smac.") validation_format = validation_format if validation_format != 'NONE' else None #### Read in data (scenario, runhistory & trajectory) reader = cls.get_reader(file_format, folder, ta_exec_dir) scenario = reader.get_scenario() scenario_sanity_check(scenario, logger) original_runhistory = reader.get_runhistory(scenario.cs) validated_runhistory = None if validation_format == "NONE" or validation_format is None: validation_format = None else: logger.debug('Using format %s for validation', validation_format) vali_reader = cls.get_reader(validation_format, folder, ta_exec_dir) vali_reader.scen = scenario validated_runhistory = vali_reader.get_validated_runhistory( scenario.cs) #self._check_rh_for_inc_and_def(self.validated_runhistory, 'validated runhistory') logger.info( "Found validated runhistory for \"%s\" and using " "it for evaluation. #configs in validated rh: %d", folder, len(validated_runhistory.config_ids)) trajectory = reader.get_trajectory(scenario.cs) return cls( scenario, original_runhistory, validated_runhistory, trajectory, options, folder, ta_exec_dir, file_format, validation_format, budget=budget, output_dir=output_dir, ) def get_incumbent(self): return self.incumbent def _init_pimp_and_validator( self, alternative_output_dir=None, ): """Create ParameterImportance-object and use it's trained model for validation and further predictions. We pass a combined (original + validated) runhistory, so that the returned model will be based on as much information as possible Parameters ---------- alternative_output_dir: str e.g. for budgets we want pimp to use an alternative output-dir (subfolders per budget) """ self.logger.debug( "Using '%s' as output for pimp", alternative_output_dir if alternative_output_dir else self.output_dir) self.pimp = Importance( scenario=copy.deepcopy(self.scenario), runhistory=self.combined_runhistory, incumbent=self.incumbent if self.incumbent else self.default, save_folder=alternative_output_dir if alternative_output_dir is not None else self.output_dir, seed=self.rng.randint(1, 100000), max_sample_size=self.options['fANOVA'].getint("pimp_max_samples"), fANOVA_pairwise=self.options['fANOVA'].getboolean( "fanova_pairwise"), preprocess=False, verbose=1, # disable progressbars ) # Validator (initialize without trajectory) self.validator = Validator(self.scenario, None, None) self.validator.epm = self.pimp.model @timing def _validate_default_and_incumbents( self, method, ta_exec_dir, ): """Validate default and incumbent configurations on all instances possible. Either use validation (physically execute the target algorithm) or EPM-estimate and update according runhistory (validation -> self.global_validated_rh; epm -> self.global_epm_rh). Parameters ---------- method: str epm or validation ta_exec_dir: str path from where the target algorithm can be executed as found in scenario (only used for actual validation) """ # TODO maybe just validate whole trajectory? self.logger.debug("Validating %s using %s!", self.get_identifier(), method) self.validator.traj = self.trajectory if method == "validation": with _changedir(ta_exec_dir): # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.combined_runhistory) self.validated_runhistory.update(new_rh) self.combined_runhistory_rh.update(new_rh) elif method == "epm": # Only do test-instances if features for test-instances are available instance_mode = 'train+test' if (any([ i not in self.scenario.feature_dict for i in self.scenario.test_insts ]) and any([ i in self.scenario.feature_dict for i in self.scenario.train_insts ])): # noqa self.logger.debug( "No features provided for test-instances (but for train!). Cannot validate on \"epm\"." ) self.logger.warning( "Features detected for train-instances, but not for test-instances. This is " "unintended usage and may lead to errors for some analysis-methods." ) instance_mode = 'train' new_rh = self.validator.validate_epm( 'def+inc', instance_mode, 1, runhistory=self.combined_runhistory) self.epm_runhistory.update(new_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes def _get_feature_names(self): if not self.scenario.feature_dict: self.logger.info( "No features available. Skipping feature analysis.") return feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: self.logger.debug( "`scenario.feature_names` is not set. Loading from '%s'", feat_fn) with _changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature names are missing. Either provide valid feature_file in scenario " "(currently %s) or set `scenario.feature_names` manually." % feat_fn) self.logger.error("Skipping Feature Analysis.") return else: # Feature names are contained in feature-file and retrieved feat_names = InputReader().read_instance_features_file( feat_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) return feat_names def _check_rh_for_inc_and_def(self, rh, name=''): """ Check if default and incumbent are evaluated on all instances in this rh Parameters ---------- rh: RunHistory runhistory to be checked name: str name for logging-purposes Returns ------- return_value: bool False if either inc or def was not evaluated on all train/test-instances """ return_value = True for c_name, c in [("default", self.default), ("inc", self.incumbent)]: runs = rh.get_runs_for_config(c) evaluated = set([inst for inst, seed in runs]) for i_name, i in [("train", self.train_inst), ("test", self.test_inst)]: not_evaluated = set(i) - evaluated if len(not_evaluated) > 0: self.logger.debug( "RunHistory %s only evaluated on %d/%d %s-insts " "for %s in folder %s", name, len(i) - len(not_evaluated), len(i), i_name, c_name, self.folder) return_value = False return return_value @classmethod def get_reader(cls, name, folder, ta_exec_dir): """ Returns an appropriate reader for the specified format. """ # TODO make autodetect format (here? where?) if name == 'SMAC3': return SMAC3Reader(folder, ta_exec_dir) elif name == 'BOHB': return SMAC3Reader(folder, ta_exec_dir) elif name == 'SMAC2': return SMAC2Reader(folder, ta_exec_dir) elif name == 'CSV': return CSVReader(folder, ta_exec_dir) else: raise ValueError("%s not supported as file-format" % name)
def _preprocess_budget( self, original_rh: RunHistory, validated_rh: RunHistory, validator: Validator, scenario: Scenario, default: Configuration, incumbent: Configuration, param_imp: Union[None, Dict[str, float]], output_dir: str, cs: ConfigurationSpace, runtime: bool = False, ): """ Preprocess data and save in self.data to enable fast replots Parameters: ----------- original_rh: RunHistory runhistory that should contain only runs that were executed during search validated_rh: RunHistory runhistory that may contain as many runs as possible, also external runs. this runhistory will be used to build the EPM validator: Validator validator to be used to estimate costs for configurations scenario: Scenario scenario object to take instances from default, incumbent: Configuration default and incumbent, they will surely be displayed param_imp: Union[None, Dict[str->float] if given, maps parameter-names to importance output_dir: str output directory for plots cs: ConfigurationSpace parameter configuration space to be visualized runtime: boolean runtime will be on logscale """ # Sorting parameters by importance, if possible (choose first executed parameter-importance) method, importance = "", {} if self.pc_sort_by == 'all': self.logger.debug("Sorting by average importance") method = 'average' for m, i in param_imp.items(): if i: for p, imp in i.items(): if p in importance: importance[p].append(imp) else: importance[p] = [imp] importance = {k: sum(v) / len(v) for k, v in importance.items()} elif self.pc_sort_by in param_imp: method, importance = self.pc_sort_by, param_imp[self.pc_sort_by] else: self.logger.debug("%s not evaluated.. choosing at random from: %s", self.pc_sort_by, str(list(param_imp.keys()))) for m, i in param_imp.items(): if i: method, importance = m, i self.logger.debug("Chose %s", method) break hp_names = sorted([p for p in cs.get_hyperparameter_names()], key=lambda x: importance.get(x, 0), reverse=True) self.logger.debug("Sorted hyperparameters by method \'%s\': %s", method, str(hp_names)) # Define set of configurations (limiting to max and choosing most interesting ones) all_configs = original_rh.get_all_configs() # max_runs_epm is the maximum total number of runs considered for epm to limit maximum possible number configs max_configs = int( self.max_runs_epm / (len(scenario.train_insts) + len(scenario.test_insts))) if len(all_configs) > max_configs: self.logger.debug( "Limiting number of configs to train epm from %d to %d (based on max runs %d) and " "choosing the ones with the most runs (for parallel coordinates)", len(all_configs), max_configs, self.max_runs_epm) all_configs = sorted(all_configs, key=lambda c: len( original_rh.get_runs_for_config( c, only_max_observed_budget=False))) all_configs = all_configs[:max_configs] if default not in all_configs: all_configs = [default] + all_configs if incumbent not in all_configs: all_configs.append(incumbent) # Get costs for those configurations epm_rh = RunHistory() epm_rh.update(validated_rh) if scenario.feature_dict: # if instances are available epm_rh.update( timing(validator.validate_epm)(all_configs, 'train+test', 1, runhistory=validated_rh)) config_to_cost = OrderedDict( {c: epm_rh.get_cost(c) for c in all_configs}) data = OrderedDict() data['cost'] = list(config_to_cost.values()) for hp in self.runscontainer.scenario.cs.get_hyperparameter_names(): data[hp] = np.array([ c[hp] # if hp in c.get_dictionary() and not isinstance(c[hp], str) else np.nan for c in config_to_cost.keys() ]) df = pd.DataFrame(data=data) return df
class CAVE(object): """ """ def __init__(self, folders: typing.List[str], output: str, ta_exec_dir: Union[str, None] = None, missing_data_method: str = 'epm', max_pimp_samples: int = -1, fanova_pairwise=True): """ Initialize CAVE facade to handle analyzing, plotting and building the report-page easily. During initialization, the analysis-infrastructure is built and the data is validated, meaning the overall best incumbent is found and default+incumbent are evaluated for all instances for all runs, by default using an EPM. The class holds two runhistories: self.original_rh -> only contains runs from the actual data self.validated_rh -> contains original runs and epm-predictions for all incumbents The analyze()-method performs an analysis and outputs a report.html. Arguments --------- folders: list<strings> paths to relevant SMAC runs output: string output for cave to write results (figures + report) ta_exec_dir: string execution directory for target algorithm (to find instance.txt, ..) missing_data_method: string from [validation, epm], how to estimate missing runs """ self.logger = logging.getLogger("cave.cavefacade") self.logger.debug("Folders: %s", str(folders)) self.ta_exec_dir = ta_exec_dir # Create output if necessary self.output = output self.logger.info("Saving results to %s", self.output) if not os.path.exists(output): self.logger.debug("Output-dir %s does not exist, creating", self.output) os.makedirs(output) if not os.path.exists(os.path.join(self.output, "debug")): os.makedirs(os.path.join(self.output, "debug")) # Log to file logger = logging.getLogger() handler = logging.FileHandler( os.path.join(self.output, "debug/debug.log"), "w") handler.setLevel(logging.DEBUG) logger.addHandler(handler) # Global runhistory combines all actual runs of individual SMAC-runs # We save the combined (unvalidated) runhistory to disk, so we can use it later on. # We keep the validated runhistory (with as many runs as possible) in # memory. The distinction is made to avoid using runs that are # only estimated using an EPM for further EPMs or to handle runs # validated on different hardware (depending on validation-method). self.original_rh = RunHistory(average_cost) self.validated_rh = RunHistory(average_cost) # Save all relevant SMAC-runs in a list self.runs = [] for folder in folders: try: self.logger.debug("Collecting data from %s.", folder) self.runs.append(SMACrun(folder, ta_exec_dir)) except Exception as err: self.logger.warning( "Folder %s could not be loaded, failed " "with error message: %s", folder, err) continue if not len(self.runs): raise ValueError( "None of the specified SMAC-folders could be loaded.") # Use scenario of first run for general purposes (expecting they are all the same anyway!) self.scenario = self.runs[0].solver.scenario # Update global runhistory with all available runhistories self.logger.debug("Update original rh with all available rhs!") runhistory_fns = [ os.path.join(run.folder, "runhistory.json") for run in self.runs ] for rh_file in runhistory_fns: self.original_rh.update_from_json(rh_file, self.scenario.cs) self.logger.debug( 'Combined number of Runhistory data points: %d. ' '# Configurations: %d. # Runhistories: %d', len(self.original_rh.data), len(self.original_rh.get_all_configs()), len(runhistory_fns)) self.original_rh.save_json( os.path.join(self.output, "combined_rh.json")) # Validator for a) validating with epm, b) plot over time # Initialize without trajectory self.validator = Validator(self.scenario, None, None) # Estimate missing costs for [def, inc1, inc2, ...] self.complete_data(method=missing_data_method) self.best_run = min( self.runs, key=lambda run: self.validated_rh.get_cost(run.solver.incumbent)) self.default = self.scenario.cs.get_default_configuration() self.incumbent = self.best_run.solver.incumbent self.logger.debug("Overall best run: %s, with incumbent: %s", self.best_run.folder, self.incumbent) # Following variable determines whether a distinction is made # between train and test-instances (e.g. in plotting) self.train_test = bool(self.scenario.train_insts != [None] and self.scenario.test_insts != [None]) self.analyzer = Analyzer(self.original_rh, self.validated_rh, self.default, self.incumbent, self.train_test, self.scenario, self.validator, self.output, max_pimp_samples, fanova_pairwise) self.builder = HTMLBuilder(self.output, "CAVE") # Builder for html-website self.website = OrderedDict([]) def complete_data(self, method="epm"): """Complete missing data of runs to be analyzed. Either using validation or EPM. """ with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): self.logger.info("Completing data using %s.", method) path_for_validated_rhs = os.path.join(self.output, "validated_rhs") for run in self.runs: self.validator.traj = run.traj if method == "validation": # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.original_rh) elif method == "epm": new_rh = self.validator.validate_epm( 'def+inc', 'train+test', 1, runhistory=self.original_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes self.validated_rh.update(new_rh) def analyze(self, performance=True, cdf=True, scatter=True, confviz=True, param_importance=['forward_selection', 'ablation', 'fanova'], feature_analysis=[ "box_violin", "correlation", "feat_importance", "clustering", "feature_cdf" ], parallel_coordinates=True, cost_over_time=True, algo_footprint=True): """Analyze the available data and build HTML-webpage as dict. Save webpage in 'self.output/CAVE/report.html'. Analyzing is performed with the analyzer-instance that is initialized in the __init__ Parameters ---------- performance: bool whether to calculate par10-values cdf: bool whether to plot cdf scatter: bool whether to plot scatter confviz: bool whether to perform configuration visualization param_importance: List[str] containing methods for parameter importance feature_analysis: List[str] containing methods for feature analysis parallel_coordinates: bool whether to plot parallel coordinates cost_over_time: bool whether to plot cost over time algo_footprint: bool whether to plot algorithm footprints """ # Check arguments for p in param_importance: if p not in [ 'forward_selection', 'ablation', 'fanova', 'incneighbor' ]: raise ValueError( "%s not a valid option for parameter " "importance!", p) for f in feature_analysis: if f not in [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ]: raise ValueError("%s not a valid option for feature analysis!", f) # Start analysis overview = self.analyzer.create_overview_table(self.best_run.folder) self.website["Meta Data"] = {"table": overview} compare_config = self.analyzer.config_to_html(self.default, self.incumbent) self.website["Best configuration"] = {"table": compare_config} ########## PERFORMANCE ANALYSIS self.website["Performance Analysis"] = OrderedDict() if performance: performance_table = self.analyzer.create_performance_table( self.default, self.incumbent) self.website["Performance Analysis"]["Performance Table"] = { "table": performance_table } if cdf: cdf_path = self.analyzer.plot_cdf() self.website["Performance Analysis"][ "empirical Cumulative Distribution Function (eCDF)"] = { "figure": cdf_path } if scatter and (self.scenario.train_insts != [[None]]): scatter_path = self.analyzer.plot_scatter() self.website["Performance Analysis"]["Scatterplot"] = { "figure": scatter_path } elif scatter: self.logger.info( "Scatter plot desired, but no instances available.") # Build report before time-consuming analysis self.build_website() if algo_footprint and self.scenario.feature_dict: algorithms = {self.default: "default", self.incumbent: "incumbent"} # Add all available incumbents to test portfolio strategy #for r in self.runs: # if not r.get_incumbent() in algorithms: # algorithms[r.get_incumbent()] = str(self.runs.index(r)) algo_footprint_plots = self.analyzer.plot_algorithm_footprint( algorithms) self.website["Performance Analysis"][ "Algorithm Footprints"] = OrderedDict() for p in algo_footprint_plots: header = os.path.splitext(os.path.split(p)[1])[0] # algo name self.website["Performance Analysis"]["Algorithm Footprints"][ header] = { "figure": p, "tooltip": get_tooltip("Algorithm Footprints") + ": " + header } self.build_website() ########### Configurator's behavior self.website["Configurator's behavior"] = OrderedDict() if confviz: if self.scenario.feature_array is None: self.scenario.feature_array = np.array([[]]) # Sort runhistories and incs wrt cost incumbents = [r.solver.incumbent for r in self.runs] trajectories = [r.traj for r in self.runs] runhistories = [r.runhistory for r in self.runs] costs = [self.validated_rh.get_cost(i) for i in incumbents] costs, incumbents, runhistories, trajectories = ( list(t) for t in zip( *sorted(zip(costs, incumbents, runhistories, trajectories), key=lambda x: x[0]))) incumbents = list(map(lambda x: x['incumbent'], trajectories[0])) confviz_script = self.analyzer.plot_confviz( incumbents, runhistories) self.website["Configurator's behavior"][ "Configurator Footprint"] = { "table": confviz_script } elif confviz: self.logger.info("Configuration visualization desired, but no " "instance-features available.") self.build_website() if cost_over_time: cost_over_time_path = self.analyzer.plot_cost_over_time( self.best_run.traj, self.validator) self.website["Configurator's behavior"]["Cost over time"] = { "figure": cost_over_time_path } self.build_website() self.parameter_importance(ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, incneighbor='incneighbor' in param_importance) self.build_website() if parallel_coordinates: # Should be after parameter importance, if performed. n_params = 6 parallel_path = self.analyzer.plot_parallel_coordinates(n_params) self.website["Configurator's behavior"]["Parallel Coordinates"] = { "figure": parallel_path } self.build_website() if self.scenario.feature_dict: self.feature_analysis(box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) else: self.logger.info('No feature analysis possible') self.logger.info("CAVE finished. Report is located in %s", os.path.join(self.output, 'report.html')) self.build_website() def parameter_importance(self, ablation=False, fanova=False, forward_selection=False, incneighbor=False): """Perform the specified parameter importance procedures. """ # PARAMETER IMPORTANCE if (ablation or forward_selection or fanova or incneighbor): self.website["Parameter Importance"] = OrderedDict() sum_ = 0 if fanova: sum_ += 1 table, plots, pair_plots = self.analyzer.fanova(self.incumbent) self.website["Parameter Importance"]["fANOVA"] = OrderedDict() self.website["Parameter Importance"]["fANOVA"]["Importance"] = { "table": table } # Insert plots (the received plots is a dict, mapping param -> path) self.website["Parameter Importance"]["fANOVA"][ "Marginals"] = OrderedDict([]) for param, plot in plots.items(): self.website["Parameter Importance"]["fANOVA"]["Marginals"][ param] = { "figure": plot } if pair_plots: self.website["Parameter Importance"]["fANOVA"][ "PairwiseMarginals"] = OrderedDict([]) for param, plot in pair_plots.items(): self.website["Parameter Importance"]["fANOVA"][ "PairwiseMarginals"][param] = { "figure": plot } if ablation: sum_ += 1 self.logger.info("Ablation...") self.analyzer.parameter_importance("ablation", self.incumbent, self.output) ablationpercentage_path = os.path.join(self.output, "ablationpercentage.png") ablationperformance_path = os.path.join(self.output, "ablationperformance.png") self.website["Parameter Importance"]["Ablation"] = { "figure": [ablationpercentage_path, ablationperformance_path] } if forward_selection: sum_ += 1 self.logger.info("Forward Selection...") self.analyzer.parameter_importance("forward-selection", self.incumbent, self.output) f_s_barplot_path = os.path.join(self.output, "forward selection-barplot.png") f_s_chng_path = os.path.join(self.output, "forward selection-chng.png") self.website["Parameter Importance"]["Forward Selection"] = { "figure": [f_s_barplot_path, f_s_chng_path] } if incneighbor: sum_ += 1 self.logger.info("Local EPM-predictions around incumbent...") plots = self.analyzer.local_epm_plots() self.website["Parameter Importance"][ "Local Parameter Importance (LPI)"] = OrderedDict([]) for param, plot in plots.items(): self.website["Parameter Importance"][ "Local Parameter Importance (LPI)"][param] = { "figure": plot } if sum_: of = os.path.join(self.output, 'pimp.tex') self.logger.info('Creating pimp latex table at %s' % of) self.analyzer.pimp.table_for_comparison(self.analyzer.evaluators, of, style='latex') def feature_analysis(self, box_violin=False, correlation=False, clustering=False, importance=False): if not (box_violin or correlation or clustering or importance): self.logger.debug("No feature analysis.") return # FEATURE ANALYSIS (ASAPY) # TODO make the following line prettier # TODO feat-names from scenario? in_reader = InputReader() feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature Analysis needs valid feature " "file! Either {} is not a valid " "filename or features are not saved in " "the scenario.") self.logger.error("Skipping Feature Analysis.") return else: feat_names = in_reader.read_instance_features_file( self.scenario.feature_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) self.website["Feature Analysis"] = OrderedDict([]) # feature importance using forward selection if importance: self.website["Feature Analysis"][ "Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance() imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html self.website["Feature Analysis"]["Feature Importance"]["Table"] = { "table": imp } for p in plots: name = os.path.splitext(os.path.basename(p))[0] self.website["Feature Analysis"]["Feature Importance"][ name] = { "figure": p } # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) self.website["Feature Analysis"][ "Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) self.website["Feature Analysis"]["Violin and Box Plots"][ key] = { "figure": plot_tuple[1] } # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: self.website["Feature Analysis"]["Correlation"] = { "figure": correlation_plot } # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) self.website["Feature Analysis"]["Clustering"] = { "figure": cluster_plot } self.build_website() def build_website(self): self.builder.generate_html(self.website)
class CAVE(object): def __init__(self, folders: typing.List[str], output_dir: str, ta_exec_dir: typing.List[str], file_format: str = 'SMAC3', validation_format='NONE', validation_method: str = 'epm', pimp_max_samples: int = -1, fanova_pairwise: bool = True, use_budgets: bool = False, seed: int = 42): """ Initialize CAVE facade to handle analyzing, plotting and building the report-page easily. During initialization, the analysis-infrastructure is built and the data is validated, the overall best incumbent is found and default+incumbent are evaluated for all instances for all runs, by default using an EPM. In the internal data-management the we have three types of runhistories: *original*, *validated* and *epm*. - *original* contain only runs that have been gathered during the optimization-process. - *validated* may contain original runs, but also data that was not gathered iteratively during the optimization, but systematically through external validation of interesting configurations. Important: NO ESTIMATED RUNS IN `validated` RUNHISTORIES! - *epm* contain runs that are gathered through empirical performance models. Runhistories are organized as follows: - each ConfiguratorRun has an *original_runhistory*- and a *combined_runhistory*-attribute - if available, each ConfiguratorRun's *validated_runhistory* contains a runhistory with validation-data gathered after the optimization - *combined_runhistory* always contains as many real runs as possible CaveFacade contains three runhistories: - *original_rh*: original runs that have been performed **during optimization**! - *validated_rh*: runs that have been validated, so they were not part of the original optimization - *epm_rh*: contains epm-predictions for all incumbents The analyze()-method performs an analysis and output a report.html. Arguments --------- folders: list<strings> paths to relevant SMAC runs output_dir: string output for cave to write results (figures + report) ta_exec_dir: string execution directory for target algorithm (to find instance.txt specified in scenario, ..) file_format: str what format the rundata is in, options are [SMAC3, SMAC2 and CSV] validation_method: string from [validation, epm], how to estimate missing runs pimp_max_samples: int passed to PIMP for configuration fanova_pairwise: bool whether to calculate pairwise marginals for fanova use_budgets: bool if true, individual runs are treated as different budgets. they are not evaluated together, but compared against each other. runs are expected in ascending budget-size. seed: int random seed for analysis (e.g. the random forests) """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.output_dir = output_dir self.rng = np.random.RandomState(seed) self.use_budgets = use_budgets self.ta_exec_dir = ta_exec_dir self.file_format = file_format self.validation_format = validation_format self.validation_method = validation_method self.pimp_max_samples = pimp_max_samples self.fanova_pairwise = fanova_pairwise self.bohb_result = None # only relevant for bohb_result # Create output_dir if necessary self.logger.info("Saving results to '%s'", self.output_dir) if not os.path.exists(output_dir): self.logger.debug("Output-dir '%s' does not exist, creating", self.output_dir) os.makedirs(output_dir) if file_format == 'BOHB': if len(folders) != 1: raise ValueError( "For file format BOHB you can only specify one folder.") self.bohb_result, folders = HpBandSter2SMAC().convert(folders[0]) # Save all relevant configurator-runs in a list self.logger.debug("Folders: %s; ta-exec-dirs: %s", str(folders), str(ta_exec_dir)) self.runs = [] if len(ta_exec_dir) < len(folders): for i in range(len(folders) - len(ta_exec_dir)): ta_exec_dir.append(ta_exec_dir[0]) for ta_exec_dir, folder in zip(ta_exec_dir, folders): try: self.logger.debug("Collecting data from %s.", folder) self.runs.append( ConfiguratorRun(folder, ta_exec_dir, file_format=file_format, validation_format=validation_format)) except Exception as err: self.logger.warning( "Folder %s could with ta_exec_dir %s not be loaded, failed with error message: %s", folder, ta_exec_dir, err) self.logger.exception(err) continue if not self.runs: raise ValueError("None of the specified folders could be loaded.") # Use scenario of first run for general purposes (expecting they are all the same anyway! self.scenario = self.runs[0].solver.scenario scenario_sanity_check(self.scenario, self.logger) self.default = self.scenario.cs.get_default_configuration() # All runs that have been actually explored during optimization self.global_original_rh = None # All original runs + validated runs if available self.global_validated_rh = None # All validated runs + EPM-estimated for def and inc on all insts self.global_epm_rh = None self.pimp = None self.model = None if use_budgets: self._init_helper_budgets() else: self._init_helper_no_budgets() self.analyzer = Analyzer(self.default, self.incumbent, self.scenario, self.output_dir, pimp_max_samples, fanova_pairwise, rng=self.rng) # Builder for html-website custom_logo = './custom_logo.png' if file_format.startswith('SMAC'): logo_fn = 'SMAC_logo.png' elif file_format == 'BOHB': logo_fn = 'BOHB_logo.png' elif os.path.exists(custom_logo): logo_fn = custom_logo else: logo_fn = 'ml4aad.png' self.logger.info( "No suitable logo found. You can use a custom logo simply by having a file called '%s' " "in the directory from which you run CAVE.", custom_logo) self.builder = HTMLBuilder(self.output_dir, "CAVE", logo_fn=logo_fn, logo_custom=custom_logo == logo_fn) self.website = OrderedDict([]) def _init_helper_budgets(self): self.best_run = self.runs[-1] self.incumbent = self.best_run.solver.incumbent def _init_helper_no_budgets(self): """No budgets means using global, aggregated runhistories to analyze the Configurator's behaviour. Also it creates an EPM using all available information, since all runs are "equal". """ self.global_original_rh = RunHistory(average_cost) self.global_validated_rh = RunHistory(average_cost) self.global_epm_rh = RunHistory( average_cost) # Save all relevant SMAC-runs in a list self.logger.debug("Update original rh with all available rhs!") for run in self.runs: self.global_original_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) self.global_validated_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL) if run.validated_runhistory: self.global_validated_rh.update( run.validated_runhistory, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) self._init_pimp_and_validator(self.global_validated_rh) # Estimate missing costs for [def, inc1, inc2, ...] self.validate_default_and_incumbents(self.validation_method, self.ta_exec_dir) self.global_epm_rh.update(self.global_validated_rh) for rh_name, rh in [("original", self.global_original_rh), ("validated", self.global_validated_rh), ("epm", self.global_epm_rh)]: self.logger.debug( 'Combined number of RunHistory data points for %s runhistory: %d ' '# Configurations: %d. # Configurator runs: %d', rh_name, len(rh.data), len(rh.get_all_configs()), len(self.runs)) # Sort runs (best first) self.runs = sorted( self.runs, key=lambda run: self.global_epm_rh.get_cost(run.solver.incumbent)) self.best_run = self.runs[0] self.incumbent = self.pimp.incumbent = self.best_run.solver.incumbent self.logger.debug("Overall best run: %s, with incumbent: %s", self.best_run.folder, self.incumbent) def _init_pimp_and_validator(self, rh, alternative_output_dir=None): """Create ParameterImportance-object and use it's trained model for validation and further predictions We pass validated runhistory, so that the returned model will be based on as much information as possible Parameters ---------- rh: RunHistory runhistory used to build EPM alternative_output_dir: str e.g. for budgets we want pimp to use an alternative output-dir (subfolders per budget) """ self.logger.debug( "Using '%s' as output for pimp", alternative_output_dir if alternative_output_dir else self.output_dir) self.pimp = Importance( scenario=copy.deepcopy(self.scenario), runhistory=rh, incumbent=self.default, # Inject correct incumbent later parameters_to_evaluate=4, save_folder=alternative_output_dir if alternative_output_dir else self.output_dir, seed=self.rng.randint(1, 100000), max_sample_size=self.pimp_max_samples, fANOVA_pairwise=self.fanova_pairwise, preprocess=False) self.model = self.pimp.model # Validator (initialize without trajectory) self.validator = Validator(self.scenario, None, None) self.validator.epm = self.model @timing def validate_default_and_incumbents(self, method, ta_exec_dir): """Validate default and incumbent configurations on all instances possible. Either use validation (physically execute the target algorithm) or EPM-estimate and update according runhistory (validation -> self.global_validated_rh; epm -> self.global_epm_rh). Parameters ---------- method: str epm or validation ta_exec_dir: str path from where the target algorithm can be executed as found in scenario (only used for actual validation) """ for run in self.runs: self.logger.debug("Validating %s using %s!", run.folder, method) self.validator.traj = run.traj if method == "validation": with changedir(ta_exec_dir): # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.global_validated_rh) self.global_validated_rh.update(new_rh) elif method == "epm": # Only do test-instances if features for test-instances are available instance_mode = 'train+test' if (any([ i not in self.scenario.feature_dict for i in self.scenario.test_insts ]) and any([ i in self.scenario.feature_dict for i in self.scenario.train_insts ])): # noqa self.logger.debug( "No features provided for test-instances (but for train!). " "Cannot validate on \"epm\".") self.logger.warning( "Features detected for train-instances, but not for test-instances. This is " "unintended usage and may lead to errors for some analysis-methods." ) instance_mode = 'train' new_rh = self.validator.validate_epm( 'def+inc', instance_mode, 1, runhistory=self.global_validated_rh) self.global_epm_rh.update(new_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes @timing def analyze(self, performance=True, cdf=True, scatter=True, cfp=True, cfp_time_slider=False, cfp_max_plot=-1, cfp_number_quantiles=10, param_importance=['forward_selection', 'ablation', 'fanova'], pimp_sort_table_by: str = "average", feature_analysis=[ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ], parallel_coordinates=True, cost_over_time=True, algo_footprint=True): """Analyze the available data and build HTML-webpage as dict. Save webpage in 'self.output_dir/CAVE/report.html'. Analyzing is performed with the analyzer-instance that is initialized in the __init__ Parameters ---------- performance: bool whether to calculate par10-values cdf: bool whether to plot cdf scatter: bool whether to plot scatter cfp: bool whether to perform configuration visualization cfp_time_slider: bool whether to include an interactive time-slider in configuration footprint cfp_max_plot: int limit number of configurations considered for configuration footprint (-1 -> all configs) cfp_number_quantiles: int number of steps over time generated in configuration footprint param_importance: List[str] containing methods for parameter importance pimp_sort_table: str in what order the parameter-importance overview should be organized feature_analysis: List[str] containing methods for feature analysis parallel_coordinates: bool whether to plot parallel coordinates cost_over_time: bool whether to plot cost over time algo_footprint: bool whether to plot algorithm footprints """ # Check arguments for p in param_importance: if p not in ['forward_selection', 'ablation', 'fanova', 'lpi']: raise ValueError( "%s not a valid option for parameter importance!" % p) for f in feature_analysis: if f not in [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ]: raise ValueError( "%s not a valid option for feature analysis!" % f) # Start analysis headings = [ "Meta Data", "Best Configuration", "Performance Analysis", "Configurator's Behavior", "Parameter Importance", "Feature Analysis" ] for h in headings: self.website[h] = OrderedDict() if self.use_budgets: # The individual configurator runs are not directory comparable and cannot be aggregated. # Nevertheless they need to be combined in one comprehensive report and some metrics are to be compared over # the individual runs. # if self.file_format == 'BOHB': # self.website["BOHB Visualization"] = {"figure" : [self.analyzer.bohb_plot(self.bohb_result)]} # Perform analysis for each run for run in self.runs: sub_sec = os.path.basename(run.folder) # Set paths for each budget individual to avoid path-conflicts sub_output_dir = os.path.join(self.output_dir, 'content', sub_sec) os.makedirs(sub_output_dir, exist_ok=True) self.analyzer = Analyzer(run.default, run.incumbent, self.scenario, sub_output_dir, self.pimp_max_samples, self.fanova_pairwise, rng=self.rng) # Set runhistories self.global_original_rh = run.original_runhistory self.global_validated_rh = run.combined_runhistory self.global_epm_rh = RunHistory(average_cost) # Train epm and stuff self._init_pimp_and_validator( run.combined_runhistory, alternative_output_dir=sub_output_dir) self.validate_default_and_incumbents(self.validation_method, run.ta_exec_dir) self.pimp.incumbent = run.incumbent self.incumbent = run.incumbent run.epm_rh = self.global_epm_rh self.best_run = run # Perform analysis overview = self.analyzer.create_overview_table( self.global_original_rh, run, len(self.runs), self.default, self.incumbent) self.website["Meta Data"][sub_sec] = {"table": overview} compare_config_html = compare_configs_to_html( self.default, self.incumbent) self.website["Best Configuration"][sub_sec] = { "table": compare_config_html } d = self.website["Performance Analysis"][ sub_sec] = OrderedDict() self.performance_analysis(d, performance, cdf, scatter, algo_footprint) d = self.website["Parameter Importance"][ sub_sec] = OrderedDict() self.parameter_importance( d, ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, lpi='lpi' in param_importance, pimp_sort_table_by=pimp_sort_table_by) d = self.website["Configurator's Behavior"][ sub_sec] = OrderedDict() self.configurators_behavior(d, cost_over_time, cfp, cfp_max_plot, cfp_time_slider, cfp_number_quantiles, parallel_coordinates) d = self.website["Feature Analysis"][sub_sec] = OrderedDict() self.feature_analysis(d, box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) self.original_runhistory = self.validated_runhistory = self.epm_runhistory = None else: overview = self.analyzer.create_overview_table( self.global_original_rh, self.runs[0], len(self.runs), self.default, self.incumbent) self.website["Meta Data"] = {"table": overview} compare_config_html = compare_configs_to_html( self.default, self.incumbent) self.website["Best Configuration"] = {"table": compare_config_html} self.performance_analysis(self.website["Performance Analysis"], performance, cdf, scatter, algo_footprint) self.parameter_importance(self.website["Parameter Importance"], ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, lpi='lpi' in param_importance, pimp_sort_table_by=pimp_sort_table_by) self.configurators_behavior( self.website["Configurator's Behavior"], cost_over_time, cfp, cfp_max_plot, cfp_time_slider, cfp_number_quantiles, parallel_coordinates) self.feature_analysis(self.website["Feature Analysis"], box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) self.build_website() self.logger.info("CAVE finished. Report is located in %s", os.path.join(self.output_dir, 'report.html')) def performance_analysis(self, d, performance, cdf, scatter, algo_footprint): """Generate performance analysis. Parameters ---------- d: dictionary dictionary to add entries to performance, cdf, scatter, algo_footprint: bool what analysis-methods to perform """ if performance: instances = [ i for i in self.scenario.train_insts + self.scenario.test_insts if i ] oracle = self.analyzer.get_oracle(instances, self.global_validated_rh) performance_table = self.analyzer.create_performance_table( self.default, self.incumbent, self.global_epm_rh, oracle) d["Performance Table"] = {"table": performance_table} if cdf: cdf_paths = self.analyzer.plot_cdf_compare(self.default, self.incumbent, self.global_epm_rh) if cdf_paths: d["empirical Cumulative Distribution Function (eCDF)"] = { "figure": cdf_paths } if scatter: scatter_paths = self.analyzer.plot_scatter(self.default, self.incumbent, self.global_epm_rh) if scatter_paths: d["Scatterplot"] = {"figure": scatter_paths} self.build_website() if algo_footprint and self.scenario.feature_dict: algorithms = [(self.default, "default"), (self.incumbent, "incumbent")] algo_footprint_plots = self.analyzer.plot_algorithm_footprint( self.global_epm_rh, algorithms) d["Algorithm Footprints"] = OrderedDict() # Interactive bokeh-plot script, div = algo_footprint_plots[0] d["Algorithm Footprints"]["Interactive Algorithm Footprint"] = { "bokeh": (script, div) } p_3d = algo_footprint_plots[1] for plots in p_3d: header = os.path.splitext(os.path.split(plots[0])[1])[0][10:-2] header = header[0].upper() + header[1:].replace('_', ' ') d["Algorithm Footprints"][header] = {"figure_x2": plots} self.build_website() def configurators_behavior(self, d, cost_over_time=False, cfp=False, cfp_max_plot=-1, cfp_time_slider=False, cfp_number_quantiles=1, parallel_coordinates=False): if cost_over_time: cost_over_time_script = self.analyzer.plot_cost_over_time( self.global_validated_rh, self.runs, self.validator) d["Cost Over Time"] = {"bokeh": cost_over_time_script} self.build_website() if cfp: # Configurator Footprint runs = [self.best_run] if self.use_budgets else self.runs res = self.analyzer.plot_configurator_footprint( self.scenario, runs, self.global_original_rh, max_confs=cfp_max_plot, time_slider=(cfp_time_slider and (cfp_number_quantiles > 1)), num_quantiles=cfp_number_quantiles) bokeh_components, cfp_paths = res if cfp_number_quantiles == 1: # Only one plot, no need for "Static"-field d["Configurator Footprint"] = {"bokeh": (bokeh_components)} else: d["Configurator Footprint"] = OrderedDict() d["Configurator Footprint"]["Interactive"] = { "bokeh": (bokeh_components) } if all([True for p in cfp_paths if os.path.exists(p) ]): # If the plots were actually generated d["Configurator Footprint"]["Static"] = { "figure": cfp_paths } else: d["Configurator Footprint"]["Static"] = { "else": "This plot is missing. Maybe it was not generated? " "Check if you installed selenium and phantomjs " "correctly to activate bokeh-exports. " "(https://automl.github.io/CAVE/stable/faq.html)" } self.build_website() if parallel_coordinates: # Should be after parameter importance, if performed. n_params = 6 parallel_path = self.analyzer.plot_parallel_coordinates( self.global_original_rh, self.global_validated_rh, self.validator, n_params) if parallel_path: d["Parallel Coordinates"] = {"figure": parallel_path} self.build_website() def parameter_importance(self, d, ablation=False, fanova=False, forward_selection=False, lpi=False, pimp_sort_table_by='average'): """Perform the specified parameter importance procedures. """ sum_ = 0 if fanova: sum_ += 1 self.logger.info("fANOVA...") d["fANOVA"] = OrderedDict() try: table, plots, pair_plots = self.analyzer.fanova( self.pimp, self.incumbent) d["fANOVA"]["Importance"] = {"table": table} # Insert plots (the received plots is a dict, mapping param -> path) d["fANOVA"]["Marginals"] = OrderedDict() for param, plot in plots.items(): d["fANOVA"]["Marginals"][param] = {"figure": plot} if pair_plots: d["fANOVA"]["Pairwise Marginals"] = OrderedDict() for param, plot in pair_plots.items(): d["fANOVA"]["Pairwise Marginals"][param] = { "figure": plot } except RuntimeError as e: err = "Encountered error '%s' in fANOVA, this can e.g. happen with too few data-points." % e self.logger.exception(err) d["fANOVA"] = { "else": err + " Check 'debug/debug.log' for more information." } self.build_website() if ablation: sum_ += 1 self.logger.info("Ablation...") self.analyzer.parameter_importance(self.pimp, "ablation", self.incumbent, self.analyzer.output_dir) ablationpercentage_path = os.path.join(self.analyzer.output_dir, "ablationpercentage.png") ablationperformance_path = os.path.join(self.analyzer.output_dir, "ablationperformance.png") d["Ablation"] = { "figure": [ablationpercentage_path, ablationperformance_path] } self.build_website() if forward_selection: sum_ += 1 self.logger.info("Forward Selection...") self.analyzer.parameter_importance(self.pimp, "forward-selection", self.incumbent, self.analyzer.output_dir) f_s_barplot_path = os.path.join(self.analyzer.output_dir, "forward-selection-barplot.png") f_s_chng_path = os.path.join(self.analyzer.output_dir, "forward-selection-chng.png") d["Forward Selection"] = { "figure": [f_s_barplot_path, f_s_chng_path] } self.build_website() if lpi: sum_ += 1 self.logger.info("Local EPM-predictions around incumbent...") plots = self.analyzer.local_epm_plots(self.pimp) d["Local Parameter Importance (LPI)"] = OrderedDict() for param, plot in plots.items(): d["Local Parameter Importance (LPI)"][param] = {"figure": plot} self.build_website() if sum_ >= 2: out_fn = os.path.join(self.output_dir, 'pimp.tex') self.logger.info('Creating pimp latex table at %s' % out_fn) self.pimp.table_for_comparison(self.analyzer.evaluators, out_fn, style='latex') table = self.analyzer.importance_table(pimp_sort_table_by) d["Importance Table"] = { "table": table, "tooltip": "Parameters are sorted by {}. Note, that the values are not " "directly comparable, since the different techniques " "provide different metrics (see respective tooltips " "for details on the differences).".format(pimp_sort_table_by) } d.move_to_end("Importance Table", last=False) self.build_website() def feature_analysis(self, d, box_violin=False, correlation=False, clustering=False, importance=False): if not self.scenario.feature_dict: self.logger.error( "No features available. Skipping feature analysis.") return feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: self.logger.debug( "`scenario.feature_names` is not set. Loading from '%s'", feat_fn) with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature names are missing. Either provide valid feature_file in scenario " "(currently %s) or set `scenario.feature_names` manually." % feat_fn) self.logger.error("Skipping Feature Analysis.") return else: # Feature names are contained in feature-file and retrieved feat_names = InputReader().read_instance_features_file( feat_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) # feature importance using forward selection if importance: d["Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance(self.pimp) imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html d["Feature Importance"]["Table"] = {"table": imp} for p in plots: name = os.path.splitext(os.path.basename(p))[0] d["Feature Importance"][name] = {"figure": p} # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) d["Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) d["Violin and Box Plots"][key] = {"figure": plot_tuple[1]} # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: d["Correlation"] = {"figure": correlation_plot} # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) d["Clustering"] = {"figure": cluster_plot} self.build_website() def build_website(self): self.builder.generate_html(self.website)