def read(run_history: RunHistory, output_dirs: typing.Union[str, typing.List[str]], configuration_space: ConfigurationSpace, logger: logging.Logger) -> None: """Update runhistory with run results from concurrent runs of pSMAC. Parameters ---------- run_history : smac.runhistory.RunHistory RunHistory object to be updated with run information from runhistory objects stored in the output directory. output_dirs : typing.Union[str,typing.List[str]] List of SMAC output directories or Linux path expression (str) which will be casted into a list with glob.glob(). This function will search the output directories for files matching the runhistory regular expression. configuration_space : ConfigSpace.ConfigurationSpace A ConfigurationSpace object to check if loaded configurations are valid. logger : logging.Logger """ numruns_in_runhistory = len(run_history.data) initial_numruns_in_runhistory = numruns_in_runhistory if isinstance(output_dirs, str): parsed_output_dirs = glob.glob(output_dirs) if glob.glob(os.path.join(output_dirs, "run_*")): parsed_output_dirs += glob.glob(os.path.join(output_dirs, "run_*")) else: parsed_output_dirs = output_dirs for output_directory in parsed_output_dirs: for file_in_output_directory in os.listdir(output_directory): match = re.match(RUNHISTORY_RE, file_in_output_directory) valid_match = re.match(VALIDATEDRUNHISTORY_RE, file_in_output_directory) if match or valid_match: runhistory_file = os.path.join(output_directory, file_in_output_directory) run_history.update_from_json(runhistory_file, configuration_space) new_numruns_in_runhistory = len(run_history.data) difference = new_numruns_in_runhistory - numruns_in_runhistory logger.debug('Shared model mode: Loaded %d new runs from %s' % (difference, runhistory_file)) numruns_in_runhistory = new_numruns_in_runhistory difference = numruns_in_runhistory - initial_numruns_in_runhistory logger.info( 'Shared model mode: Finished loading new runs, found %d new runs.' % difference)
run_obj=scenario.run_obj, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) if args_.tae == "aclib": tae = ExecuteTARunAClib(ta=scenario.ta, run_obj=scenario.run_obj, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) validator = Validator(scenario, trajectory, args_.seed) # Load runhistory if args_.runhistory: runhistory = RunHistory(average_cost) for rh_path in args_.runhistory: runhistory.update_from_json(rh_path, scenario.cs) else: runhistory = None if args_.epm: validator.validate_epm(config_mode=args_.configs, instance_mode=args_.instances, repetitions=args_.repetitions, runhistory=runhistory, output_fn=args_.output) else: validator.validate(config_mode=args_.configs, instance_mode=args_.instances, repetitions=args_.repetitions, n_jobs=args_.n_jobs, runhistory=runhistory,
par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) if args_.tae == "aclib": tae = ExecuteTARunAClib(ta=scenario.ta, run_obj=scenario.run_obj, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) validator = Validator(scenario, trajectory, args_.seed) # Load runhistory if args_.runhistory: runhistory = RunHistory(average_cost, file_system=scenario.file_system) for rh_path in args_.runhistory: runhistory.update_from_json(rh_path, scenario.cs, file_system=scenario.file_system) else: runhistory = None if args_.epm: validator.validate_epm(config_mode=args_.configs, instance_mode=args_.instances, repetitions=args_.repetitions, runhistory=runhistory, output_fn=args_.output) else: validator.validate(config_mode=args_.configs, instance_mode=args_.instances, repetitions=args_.repetitions, n_jobs=args_.n_jobs,
required=True, help="scenario file in AClib format") req_opts.add_argument("--runhistory", required=True, nargs="+", help="runhistory files") req_opts.add_argument("--verbose_level", default=logging.INFO, choices=["INFO", "DEBUG"], help="random seed") req_opts.add_argument("--save_fn", default="fw_importance.pdf", help="file name of saved plot") args_ = parser.parse_args() logging.basicConfig(level=args_.verbose_level) # if args_.verbose_level == "DEBUG": # logging.parent.level = 10 scen = Scenario(args_.scenario_file) hist = RunHistory() for runhist_fn in args_.runhistory: hist.update_from_json(fn=runhist_fn, cs=scen.cs) fws = ForwardSelection(scenario=scen, runhistory=hist) fws.run(save_fn=args_.save_fn)
def test_load(self): configuration_space = test_helpers.get_branin_config_space() other_runhistory = '{"data": [[[2, "branini", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[1, "branin", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[3, "branin-hoo", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[2, null, 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[1, "branini", 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]], ' \ '[[4, null, 1], [1, 1,' \ '{"__enum__": "StatusType.SUCCESS"}, null]]], ' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' other_runhistory_filename = os.path.join(self.tmp_dir, 'runhistory.json') with open(other_runhistory_filename, 'w') as fh: fh.write(other_runhistory) # load from an empty runhistory runhistory = RunHistory(aggregate_func=average_cost) runhistory.load_json(other_runhistory_filename, configuration_space) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) self.assertEqual(len(runhistory.data), 6) # load from non-empty runhistory, in case of a duplicate the existing # result will be kept and the new one silently discarded runhistory = RunHistory(aggregate_func=average_cost) configuration_space.seed(1) config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 6) self.assertEqual(id_before, id_after) # load from non-empty runhistory, in case of a duplicate the existing # result will be kept and the new one silently discarded runhistory = RunHistory(aggregate_func=average_cost) configuration_space.seed(1) config = configuration_space.sample_configuration() config = configuration_space.sample_configuration() # This is the former config_3 config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 7) self.assertEqual(id_before, id_after) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) self.assertEqual( [runhistory.external[run_key] for run_key in runhistory.data], [DataOrigin.INTERNAL] + [DataOrigin.EXTERNAL_SAME_INSTANCES] * 6)
def test_load(self): configuration_space = test_helpers.get_branin_config_space() other_runhistory = '{"data": [[[2, "branini", 1], [1, 1, 1, null]], ' \ '[[1, "branin", 1], [1, 1, 1, null]], ' \ '[[3, "branin-hoo", 1], [1, 1, 1, null]], ' \ '[[2, null, 1], [1, 1, 1, null]], ' \ '[[1, "branini", 1], [1, 1, 1, null]], ' \ '[[4, null, 1], [1, 1, 1, null]]], ' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' other_runhistory_filename = os.path.join(self.tmp_dir, '.runhistory_20.json') with open(other_runhistory_filename, 'w') as fh: fh.write(other_runhistory) # load from an empty runhistory runhistory = RunHistory() runhistory.load_json(other_runhistory_filename, configuration_space) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) self.assertEqual(len(runhistory.data), 6) # load from non-empty runhistory, but existing run will be overridden # because it alread existed runhistory = RunHistory() configuration_space.seed(1) config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 6) self.assertNotEqual(id_before, id_after) # load from non-empty runhistory, but existing run will not be # overridden, but config_id will be re-used runhistory = RunHistory() configuration_space.seed(1) config = configuration_space.sample_configuration() config = configuration_space.sample_configuration() # This is the former config_3 config = configuration_space.sample_configuration() runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') id_before = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) runhistory.update_from_json(other_runhistory_filename, configuration_space) id_after = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)]) self.assertEqual(len(runhistory.data), 7) self.assertEqual(id_before, id_after) print(runhistory.config_ids) self.assertEqual(sorted(list(runhistory.ids_config.keys())), [1, 2, 3, 4]) print(list(runhistory.data.keys()))
class CAVE(object): """ """ def __init__(self, folders: typing.List[str], output: str, ta_exec_dir: Union[str, None] = None, missing_data_method: str = 'epm', max_pimp_samples: int = -1, fanova_pairwise=True): """ Initialize CAVE facade to handle analyzing, plotting and building the report-page easily. During initialization, the analysis-infrastructure is built and the data is validated, meaning the overall best incumbent is found and default+incumbent are evaluated for all instances for all runs, by default using an EPM. The class holds two runhistories: self.original_rh -> only contains runs from the actual data self.validated_rh -> contains original runs and epm-predictions for all incumbents The analyze()-method performs an analysis and outputs a report.html. Arguments --------- folders: list<strings> paths to relevant SMAC runs output: string output for cave to write results (figures + report) ta_exec_dir: string execution directory for target algorithm (to find instance.txt, ..) missing_data_method: string from [validation, epm], how to estimate missing runs """ self.logger = logging.getLogger("cave.cavefacade") self.logger.debug("Folders: %s", str(folders)) self.ta_exec_dir = ta_exec_dir # Create output if necessary self.output = output self.logger.info("Saving results to %s", self.output) if not os.path.exists(output): self.logger.debug("Output-dir %s does not exist, creating", self.output) os.makedirs(output) if not os.path.exists(os.path.join(self.output, "debug")): os.makedirs(os.path.join(self.output, "debug")) # Log to file logger = logging.getLogger() handler = logging.FileHandler( os.path.join(self.output, "debug/debug.log"), "w") handler.setLevel(logging.DEBUG) logger.addHandler(handler) # Global runhistory combines all actual runs of individual SMAC-runs # We save the combined (unvalidated) runhistory to disk, so we can use it later on. # We keep the validated runhistory (with as many runs as possible) in # memory. The distinction is made to avoid using runs that are # only estimated using an EPM for further EPMs or to handle runs # validated on different hardware (depending on validation-method). self.original_rh = RunHistory(average_cost) self.validated_rh = RunHistory(average_cost) # Save all relevant SMAC-runs in a list self.runs = [] for folder in folders: try: self.logger.debug("Collecting data from %s.", folder) self.runs.append(SMACrun(folder, ta_exec_dir)) except Exception as err: self.logger.warning( "Folder %s could not be loaded, failed " "with error message: %s", folder, err) continue if not len(self.runs): raise ValueError( "None of the specified SMAC-folders could be loaded.") # Use scenario of first run for general purposes (expecting they are all the same anyway!) self.scenario = self.runs[0].solver.scenario # Update global runhistory with all available runhistories self.logger.debug("Update original rh with all available rhs!") runhistory_fns = [ os.path.join(run.folder, "runhistory.json") for run in self.runs ] for rh_file in runhistory_fns: self.original_rh.update_from_json(rh_file, self.scenario.cs) self.logger.debug( 'Combined number of Runhistory data points: %d. ' '# Configurations: %d. # Runhistories: %d', len(self.original_rh.data), len(self.original_rh.get_all_configs()), len(runhistory_fns)) self.original_rh.save_json( os.path.join(self.output, "combined_rh.json")) # Validator for a) validating with epm, b) plot over time # Initialize without trajectory self.validator = Validator(self.scenario, None, None) # Estimate missing costs for [def, inc1, inc2, ...] self.complete_data(method=missing_data_method) self.best_run = min( self.runs, key=lambda run: self.validated_rh.get_cost(run.solver.incumbent)) self.default = self.scenario.cs.get_default_configuration() self.incumbent = self.best_run.solver.incumbent self.logger.debug("Overall best run: %s, with incumbent: %s", self.best_run.folder, self.incumbent) # Following variable determines whether a distinction is made # between train and test-instances (e.g. in plotting) self.train_test = bool(self.scenario.train_insts != [None] and self.scenario.test_insts != [None]) self.analyzer = Analyzer(self.original_rh, self.validated_rh, self.default, self.incumbent, self.train_test, self.scenario, self.validator, self.output, max_pimp_samples, fanova_pairwise) self.builder = HTMLBuilder(self.output, "CAVE") # Builder for html-website self.website = OrderedDict([]) def complete_data(self, method="epm"): """Complete missing data of runs to be analyzed. Either using validation or EPM. """ with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): self.logger.info("Completing data using %s.", method) path_for_validated_rhs = os.path.join(self.output, "validated_rhs") for run in self.runs: self.validator.traj = run.traj if method == "validation": # TODO determine # repetitions new_rh = self.validator.validate( 'def+inc', 'train+test', 1, -1, runhistory=self.original_rh) elif method == "epm": new_rh = self.validator.validate_epm( 'def+inc', 'train+test', 1, runhistory=self.original_rh) else: raise ValueError("Missing data method illegal (%s)", method) self.validator.traj = None # Avoid usage-mistakes self.validated_rh.update(new_rh) def analyze(self, performance=True, cdf=True, scatter=True, confviz=True, param_importance=['forward_selection', 'ablation', 'fanova'], feature_analysis=[ "box_violin", "correlation", "feat_importance", "clustering", "feature_cdf" ], parallel_coordinates=True, cost_over_time=True, algo_footprint=True): """Analyze the available data and build HTML-webpage as dict. Save webpage in 'self.output/CAVE/report.html'. Analyzing is performed with the analyzer-instance that is initialized in the __init__ Parameters ---------- performance: bool whether to calculate par10-values cdf: bool whether to plot cdf scatter: bool whether to plot scatter confviz: bool whether to perform configuration visualization param_importance: List[str] containing methods for parameter importance feature_analysis: List[str] containing methods for feature analysis parallel_coordinates: bool whether to plot parallel coordinates cost_over_time: bool whether to plot cost over time algo_footprint: bool whether to plot algorithm footprints """ # Check arguments for p in param_importance: if p not in [ 'forward_selection', 'ablation', 'fanova', 'incneighbor' ]: raise ValueError( "%s not a valid option for parameter " "importance!", p) for f in feature_analysis: if f not in [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ]: raise ValueError("%s not a valid option for feature analysis!", f) # Start analysis overview = self.analyzer.create_overview_table(self.best_run.folder) self.website["Meta Data"] = {"table": overview} compare_config = self.analyzer.config_to_html(self.default, self.incumbent) self.website["Best configuration"] = {"table": compare_config} ########## PERFORMANCE ANALYSIS self.website["Performance Analysis"] = OrderedDict() if performance: performance_table = self.analyzer.create_performance_table( self.default, self.incumbent) self.website["Performance Analysis"]["Performance Table"] = { "table": performance_table } if cdf: cdf_path = self.analyzer.plot_cdf() self.website["Performance Analysis"][ "empirical Cumulative Distribution Function (eCDF)"] = { "figure": cdf_path } if scatter and (self.scenario.train_insts != [[None]]): scatter_path = self.analyzer.plot_scatter() self.website["Performance Analysis"]["Scatterplot"] = { "figure": scatter_path } elif scatter: self.logger.info( "Scatter plot desired, but no instances available.") # Build report before time-consuming analysis self.build_website() if algo_footprint and self.scenario.feature_dict: algorithms = {self.default: "default", self.incumbent: "incumbent"} # Add all available incumbents to test portfolio strategy #for r in self.runs: # if not r.get_incumbent() in algorithms: # algorithms[r.get_incumbent()] = str(self.runs.index(r)) algo_footprint_plots = self.analyzer.plot_algorithm_footprint( algorithms) self.website["Performance Analysis"][ "Algorithm Footprints"] = OrderedDict() for p in algo_footprint_plots: header = os.path.splitext(os.path.split(p)[1])[0] # algo name self.website["Performance Analysis"]["Algorithm Footprints"][ header] = { "figure": p, "tooltip": get_tooltip("Algorithm Footprints") + ": " + header } self.build_website() ########### Configurator's behavior self.website["Configurator's behavior"] = OrderedDict() if confviz: if self.scenario.feature_array is None: self.scenario.feature_array = np.array([[]]) # Sort runhistories and incs wrt cost incumbents = [r.solver.incumbent for r in self.runs] trajectories = [r.traj for r in self.runs] runhistories = [r.runhistory for r in self.runs] costs = [self.validated_rh.get_cost(i) for i in incumbents] costs, incumbents, runhistories, trajectories = ( list(t) for t in zip( *sorted(zip(costs, incumbents, runhistories, trajectories), key=lambda x: x[0]))) incumbents = list(map(lambda x: x['incumbent'], trajectories[0])) confviz_script = self.analyzer.plot_confviz( incumbents, runhistories) self.website["Configurator's behavior"][ "Configurator Footprint"] = { "table": confviz_script } elif confviz: self.logger.info("Configuration visualization desired, but no " "instance-features available.") self.build_website() if cost_over_time: cost_over_time_path = self.analyzer.plot_cost_over_time( self.best_run.traj, self.validator) self.website["Configurator's behavior"]["Cost over time"] = { "figure": cost_over_time_path } self.build_website() self.parameter_importance(ablation='ablation' in param_importance, fanova='fanova' in param_importance, forward_selection='forward_selection' in param_importance, incneighbor='incneighbor' in param_importance) self.build_website() if parallel_coordinates: # Should be after parameter importance, if performed. n_params = 6 parallel_path = self.analyzer.plot_parallel_coordinates(n_params) self.website["Configurator's behavior"]["Parallel Coordinates"] = { "figure": parallel_path } self.build_website() if self.scenario.feature_dict: self.feature_analysis(box_violin='box_violin' in feature_analysis, correlation='correlation' in feature_analysis, clustering='clustering' in feature_analysis, importance='importance' in feature_analysis) else: self.logger.info('No feature analysis possible') self.logger.info("CAVE finished. Report is located in %s", os.path.join(self.output, 'report.html')) self.build_website() def parameter_importance(self, ablation=False, fanova=False, forward_selection=False, incneighbor=False): """Perform the specified parameter importance procedures. """ # PARAMETER IMPORTANCE if (ablation or forward_selection or fanova or incneighbor): self.website["Parameter Importance"] = OrderedDict() sum_ = 0 if fanova: sum_ += 1 table, plots, pair_plots = self.analyzer.fanova(self.incumbent) self.website["Parameter Importance"]["fANOVA"] = OrderedDict() self.website["Parameter Importance"]["fANOVA"]["Importance"] = { "table": table } # Insert plots (the received plots is a dict, mapping param -> path) self.website["Parameter Importance"]["fANOVA"][ "Marginals"] = OrderedDict([]) for param, plot in plots.items(): self.website["Parameter Importance"]["fANOVA"]["Marginals"][ param] = { "figure": plot } if pair_plots: self.website["Parameter Importance"]["fANOVA"][ "PairwiseMarginals"] = OrderedDict([]) for param, plot in pair_plots.items(): self.website["Parameter Importance"]["fANOVA"][ "PairwiseMarginals"][param] = { "figure": plot } if ablation: sum_ += 1 self.logger.info("Ablation...") self.analyzer.parameter_importance("ablation", self.incumbent, self.output) ablationpercentage_path = os.path.join(self.output, "ablationpercentage.png") ablationperformance_path = os.path.join(self.output, "ablationperformance.png") self.website["Parameter Importance"]["Ablation"] = { "figure": [ablationpercentage_path, ablationperformance_path] } if forward_selection: sum_ += 1 self.logger.info("Forward Selection...") self.analyzer.parameter_importance("forward-selection", self.incumbent, self.output) f_s_barplot_path = os.path.join(self.output, "forward selection-barplot.png") f_s_chng_path = os.path.join(self.output, "forward selection-chng.png") self.website["Parameter Importance"]["Forward Selection"] = { "figure": [f_s_barplot_path, f_s_chng_path] } if incneighbor: sum_ += 1 self.logger.info("Local EPM-predictions around incumbent...") plots = self.analyzer.local_epm_plots() self.website["Parameter Importance"][ "Local Parameter Importance (LPI)"] = OrderedDict([]) for param, plot in plots.items(): self.website["Parameter Importance"][ "Local Parameter Importance (LPI)"][param] = { "figure": plot } if sum_: of = os.path.join(self.output, 'pimp.tex') self.logger.info('Creating pimp latex table at %s' % of) self.analyzer.pimp.table_for_comparison(self.analyzer.evaluators, of, style='latex') def feature_analysis(self, box_violin=False, correlation=False, clustering=False, importance=False): if not (box_violin or correlation or clustering or importance): self.logger.debug("No feature analysis.") return # FEATURE ANALYSIS (ASAPY) # TODO make the following line prettier # TODO feat-names from scenario? in_reader = InputReader() feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature Analysis needs valid feature " "file! Either {} is not a valid " "filename or features are not saved in " "the scenario.") self.logger.error("Skipping Feature Analysis.") return else: feat_names = in_reader.read_instance_features_file( self.scenario.feature_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) self.website["Feature Analysis"] = OrderedDict([]) # feature importance using forward selection if importance: self.website["Feature Analysis"][ "Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance() imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html self.website["Feature Analysis"]["Feature Importance"]["Table"] = { "table": imp } for p in plots: name = os.path.splitext(os.path.basename(p))[0] self.website["Feature Analysis"]["Feature Importance"][ name] = { "figure": p } # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) self.website["Feature Analysis"][ "Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) self.website["Feature Analysis"]["Violin and Box Plots"][ key] = { "figure": plot_tuple[1] } # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: self.website["Feature Analysis"]["Correlation"] = { "figure": correlation_plot } # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) self.website["Feature Analysis"]["Clustering"] = { "figure": cluster_plot } self.build_website() def build_website(self): self.builder.generate_html(self.website)
def read_configurations_for_task_id(task_id, task_id_to_dir, cs): incumbents_test_rval = list() rh = RunHistory() for entry in task_id_to_dir[task_id]: # Merge all evaluations from multiple SMAC runs into one runhistory rh.update_from_json(entry, cs) X = [] run_times = [] Y_train = [] Y_test = [] status = [] results = {key.config_id: value for key, value in rh.data.items()} max_lc_length = 0 for config_id in results: run_times_tmp = [] y_train = [] y_test = [] if results[config_id].status == StatusType.SUCCESS: run_times_tmp.append(results[config_id].time) y_train.append(results[config_id].additional_info['train_loss']) y_test.append(results[config_id].additional_info['test_loss']) status.append(0) else: run_times_tmp.append(results[config_id].time) y_train.append(1.0) y_test.append(1.0) status.append(1) X.append(rh.ids_config[config_id]) run_times.append(run_times_tmp) Y_train.append(y_train) Y_test.append(y_test) run_times = np.array(run_times) Y_train = np.array(Y_train) Y_test = np.array(Y_test) assert len(X) != 0 assert run_times.dtype == np.float, (task_id, run_times.dtype) assert len(X) == run_times.shape[0] assert Y_train.dtype == np.float, (task_id, Y_train.dtype) assert len(X) == Y_train.shape[0] assert Y_test.dtype == np.float, (task_id, Y_test.dtype) assert len(X) == Y_test.shape[0] if len(run_times.shape) == 1: raise ValueError() # Get all configs with the best value incumbent_test = list(np.where(Y_test == Y_test.min())[0]) # Shuffle incumbent array rng = np.random.RandomState(task_id) rng.shuffle(incumbent_test) for idx in incumbent_test: config = Configuration(cs, values=X[idx]) incumbents_test_rval.append(config) # Return all incumbents return task_id, incumbents_test_rval
class SMACrun(SMAC): """ SMACrun keeps all information on a specific SMAC run. Extends the standard SMAC-facade. """ def __init__(self, folder: str, ta_exec_dir: Union[str, None] = None): """Initialize scenario, runhistory and incumbent from folder, execute init-method of SMAC facade (so you could simply use SMAC-instances instead) Parameters ---------- folder: string output-dir of this run ta_exec_dir: string if the execution directory for the SMAC-run differs from the cwd, there might be problems loading instance-, feature- or PCS-files in the scenario-object. since instance- and PCS-files are necessary, specify the path to the execution-dir of SMAC here """ run_1_existed = os.path.exists('run_1') self.logger = logging.getLogger("cave.SMACrun.{}".format(folder)) in_reader = InputReader() self.folder = folder self.logger.debug("Loading from %s", folder) split_folder = os.path.split(folder) self.logger.info(split_folder) if ta_exec_dir is None: ta_exec_dir = '.' self.scen_fn = os.path.join(folder, 'scenario.txt') self.rh_fn = os.path.join(folder, 'runhistory.json') self.traj_fn = os.path.join(folder, 'traj_aclib2.json') self.traj_old_fn = os.path.join(folder, 'traj_old.csv') # Create Scenario (disable output_dir to avoid cluttering) scen_dict = in_reader.read_scenario_file(self.scen_fn) scen_dict['output_dir'] = "" with changedir(ta_exec_dir): self.scen = Scenario(scen_dict) # Load runhistory and trajectory self.runhistory = RunHistory(average_cost) self.runhistory.update_from_json(self.rh_fn, self.scen.cs) self.traj = TrajLogger.read_traj_aclib_format(fn=self.traj_fn, cs=self.scen.cs) incumbent = self.traj[-1]['incumbent'] self.train_inst = self.scen.train_insts self.test_inst = self.scen.test_insts # Initialize SMAC-object super().__init__(scenario=self.scen, runhistory=self.runhistory) #restore_incumbent=incumbent) # TODO use restore, delete next line self.solver.incumbent = incumbent if (not run_1_existed) and os.path.exists('run_1'): shutil.rmtree('run_1') def get_incumbent(self): return self.solver.incumbent