def run_master(args): NS = hpns.NameServer(run_id=args.run_id, nic_name=args.nic_name, working_directory=args.bohb_root_path) ns_host, ns_port = NS.start() # Start a background worker for the master node if args.optimize_generalist: w = AggregateWorker(run_id=args.run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, working_directory=args.bohb_root_path, n_repeat=args.n_repeat, has_repeats_as_budget=args.n_repeat is None, time_budget=args.time_budget, time_budget_approx=args.time_budget_approx, performance_matrix=args.performance_matrix) else: w = SingleWorker(run_id=args.run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, working_directory=args.bohb_root_path, n_repeat=args.n_repeat, dataset=args.dataset, time_budget=args.time_budget, time_budget_approx=args.time_budget_approx) w.run(background=True) # Create an optimizer result_logger = hpres.json_result_logger(directory=args.bohb_root_path, overwrite=False) if args.previous_run_dir is not None: previous_result = hpres.logged_results_to_HBS_result( args.previous_run_dir) else: pervious_result = None logger = logging.getLogger(__file__) logging_level = getattr(logging, args.logger_level) logger.setLevel(logging_level) optimizer = BOHB(configspace=get_configspace(), run_id=args.run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, min_budget=args.n_repeat_lower_budget, max_budget=args.n_repeat_upper_budget, result_logger=result_logger, logger=logger, previous_result=previous_result) res = optimizer.run(n_iterations=args.n_iterations) # Shutdown optimizer.shutdown(shutdown_workers=True) NS.shutdown()
def test_incumbent_trajectory(self): """ Load example result and check incumbent_trajectory generation for general errors (whitebox-test)""" result = logged_results_to_HBS_result(self.result_path) # All budgets traj = get_incumbent_trajectory(result, result.HB_config['budgets'], mode='racing') traj = get_incumbent_trajectory(result, result.HB_config['budgets'], mode='minimum') traj = get_incumbent_trajectory(result, result.HB_config['budgets'], mode='prefer_higher_budget') # Single budgets traj = get_incumbent_trajectory(result, [result.HB_config['budgets'][0]], mode='racing') traj = get_incumbent_trajectory(result, [result.HB_config['budgets'][0]], mode='minimum') traj = get_incumbent_trajectory(result, [result.HB_config['budgets'][0]], mode='prefer_higher_budget')
def build_run_trajectories(results_folder, autonet_config): # parse results res = logged_results_to_HBS_result(results_folder) incumbent_trajectory = res.get_incumbent_trajectory( bigger_is_better=False, non_decreasing_budget=False) # prepare metric_name = autonet_config["train_metric"] all_metrics = autonet_config["additional_metrics"] + [metric_name] additional_metric_names = ["val_" + m for m in all_metrics] additional_metric_names += ["train_" + m for m in all_metrics] additional_metric_names += autonet_config["additional_logs"] # initialize incumbent trajectories incumbent_trajectories = dict() # save incumbent trajectories incumbent_trajectories[metric_name] = incumbent_trajectory incumbent_trajectory["flipped"] = not autonet_config["minimize"] for name in additional_metric_names: tj = copy(incumbent_trajectory) tj["losses"] = [ run["info"][name] for config_id, budget in zip(tj["config_ids"], tj["budgets"]) for run in res.get_runs_by_id(config_id) if run["budget"] == budget and name in run["info"] ] tj["flipped"] = False if tj["losses"]: incumbent_trajectories[name] = tj return incumbent_trajectories
def parse_results(self, pipeline_config): try: res = logged_results_to_HBS_result( pipeline_config["result_logger_dir"]) id2config = res.get_id2config_mapping() incumbent_trajectory = res.get_incumbent_trajectory( bigger_is_better=False, non_decreasing_budget=False) except Exception as e: raise RuntimeError( "Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet." ) if (len(incumbent_trajectory['config_ids']) == 0): return dict() final_config_id = incumbent_trajectory['config_ids'][-1] final_budget = incumbent_trajectory['budgets'][-1] best_run = [ r for r in res.get_runs_by_id(final_config_id) if r.budget == final_budget ][0] return { 'optimized_hyperparameter_config': id2config[final_config_id]['config'], 'budget': final_budget, 'loss': best_run.loss, 'info': best_run.info }
def get_best_models_from_log(log_dir): if not os.path.isdir(log_dir): log_dir = log_dir.replace('nierhoff', 'dingsda') result = hpres.logged_results_to_HBS_result(log_dir) best_models = [] for value in result.data.values(): try: loss = value.results[1.0]['loss'] model_name = value.results[1.0]['info']['model_name'] if not os.path.isfile(model_name): model_name = model_name.replace('nierhoff', 'dingsda') best_models.append((loss, model_name)) except: continue # before AUC (objective minimized) # print("sorting from low to high values (non-AUC)") # best_models.sort(key=lambda x: x[0]) # best_models = best_models[:MODEL_NUM] # AUC (objective maximized) print("sorting from high to low values (AUC)") best_models.sort(key=lambda x: x[0], reverse=True) best_models = best_models[:MODEL_NUM] return best_models
def generateLossComparison(out_dir, show=False): ''' Function to generate box plots over different budgets for an entire BOHB run :param out_dir: Directory where the plots are to be saved :param show: True/False to display the plots (additionally to saving) :return: void ''' # load the example run from the log files result = hpres.logged_results_to_HBS_result(out_dir) plot_data = {} for k in result.data.keys(): try: sample = result.data[k].results except TryError: continue for b in sample: if sample[b] is None: continue if b not in plot_data.keys(): plot_data[int(b)] = [[sample[b]['info']['train_loss']], [sample[b]['info']['test_loss']]] else: # print(k, b) plot_data[int(b)][0].append(sample[b]['info']['train_loss']) plot_data[int(b)][1].append(sample[b]['info']['test_loss']) max_loss = 0 for i, k in enumerate(plot_data.keys()): max_loss = max(max_loss, np.max(np.array(plot_data[k]))) fig = plt.figure(figsize=(10, 4), dpi=150) plt.suptitle( "Loss comparison of Train and Validation over Epochs (Budget)") gs = gridspec.GridSpec(1, len(plot_data.keys())) for i, k in enumerate(plot_data.keys()): exec("ax" + str(i + 1) + " = plt.subplot(gs[0, " + str(i) + "])") exec("ax" + str(i + 1) + ".grid(which='major', linestyle=':', axis='y')") exec( "bp = ax" + str(i + 1) + ".boxplot(plot_data[" + str(k) + "], showmeans=True, meanline=True, " + "sym='+', meanprops={'linestyle':'-'},whiskerprops={'linestyle': '--', 'color': 'blue'})" ) exec("ax" + str(i + 1) + ".set_ylim(0, " + str(max_loss + 0.1) + ")") exec("ax" + str(i + 1) + ".set_xlabel('Budget:" + str(k) + "')") if i == 0: exec("ax" + str(i + 1) + ".set_ylabel('Loss')") exec("setBoxColors(bp)") hB, = plt.plot([1, 1], 'b-') hR, = plt.plot([1, 1], 'r-') hG, = plt.plot([1, 1], 'g-') plt.figlegend((hB, hR, hG), ('Training', 'Validation', 'Mean'), loc='upper right') hB.set_visible(False) hR.set_visible(False) hG.set_visible(False) plt.savefig(out_dir + '/loss_comparison_plot.png', dpi=300) if show: plt.show()
def __init__(self, hp_names, result_object=None, result_path=None): """ Visualize hpbandster learning curves in an interactive bokeh-plot. Parameters ---------- hp_names: List[str] list with hyperparameters-names result_object: Result hpbandster-result object. must be specified if result_path is not result_path: str path to hpbandster result-folder. must contain configs.json and results.json. must be specified if result_object is not """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) try: from hpbandster.core.result import logged_results_to_HBS_result from hpbandster.core.result import extract_HBS_learning_curves except ImportError as err: self.logger.exception(err) raise ImportError("You need to install hpbandster (e.g. 'pip install hpbandster') to analyze bohb-results.") if (result_path and result_object) or not (result_path or result_object): raise ValueError("Specify either result_path or result_object. (currently \"%s\" and \"%s\")" % (result_path, result_object)) elif result_path: result_object = logged_results_to_HBS_result(result_path) incumbent_trajectory = result_object.get_incumbent_trajectory() self.hp_names = hp_names self.result_object = result_object self.lcs = result_object.get_learning_curves(lc_extractor=extract_HBS_learning_curves)
def setup_fanova_analysis(run_name): bohb_logs_dir = run_name res = hpres.logged_results_to_HBS_result(bohb_logs_dir) inc_id = res.get_incumbent_id() id2conf = res.get_id2config_mapping() inc_trajectory = res.get_incumbent_trajectory() print(inc_trajectory) print(res.get_runs_by_id(inc_id)) all_runs = list( filter(lambda r: not (r.info is None or r.loss is None), res.get_all_runs())) budgets = res.HB_config['budgets'] runs_by_budget = {} for b in budgets: runs_by_budget[b] = list(filter(lambda r: r.budget == b, all_runs)) fanova_analysis(budgets=budgets, res=res, runs_by_budget=runs_by_budget, id2conf=id2conf, bohb_logs_dir=bohb_logs_dir)
def get_trajectories(true_paths, surrogate_paths, methods=['BANANAS'], surrogate='xgb'): print(true_paths) print(surrogate_paths) all_trajectories = {} for m in methods: dfs = [] for i, true_path in enumerate(true_paths): print(true_path) true_results = hpres.logged_results_to_HBS_result(true_path) true_inc = extract_incumbents(true_results, surrogate=False) error = 100 - true_inc[:, 0] times = true_inc[:, 1] df = pd.DataFrame({str(i): error}, index=times) dfs.append(df) df_true = merge_and_fill_trajectories(dfs, default_value=None) dfs = [] for i, surr_path in enumerate(surrogate_paths): try: print(surr_path) surr_results = hpres.logged_results_to_HBS_result(surr_path) surr_inc = extract_incumbents(surr_results, surrogate=True) error = 100 - surr_inc[:, 0] times = surr_inc[:, 1] df = pd.DataFrame({str(i): error}, index=times) dfs.append(df) except Exception as e: print('Could not read:', surr_path) df_surr = merge_and_fill_trajectories(dfs, default_value=None) all_trajectories[m + ' true'] = { 'time_stamps': np.array(df_true.index), 'errors': np.array(df_true.T) } all_trajectories[m + ' surr'] = { 'time_stamps': np.array(df_surr.index), 'errors': np.array(df_surr.T) } return all_trajectories
def extract_best_config(dataset): """ Gets the bohb results of the given dataset and return the best architecture and configuration. :param dataset: string :return: incumbent_model: string, one of ['ESN', 'CNN_1D', 'LSTM', 'FCN'] incumbent_config: configuration object """ models = ['ESN', 'CNN_1D', 'LSTM', 'FCN'] result_dir = 'logs_sample_dataset' # BOHB results for each model&dataset combination results = [ os.path.join(result_dir, name) for name in os.listdir(result_dir) ] results_current_dataset = [r for r in results if dataset in r] # to store the losses of each and compare the results later incumbent_configs = [] incumbent_losses = [] for model in models: result_folder = [r for r in results_current_dataset if model in r] result = result_folder[0] # load the example run from the log files result = hpres.logged_results_to_HBS_result(result) # get the 'dict' that translates config ids to the actual configurations id2conf = result.get_id2config_mapping() # get incumbent id inc_id = result.get_incumbent_id() # get result of the incumbent inc_runs = result.get_runs_by_id(inc_id) inc_run = inc_runs[-1] inc_config = id2conf[inc_id]['config'] # best config inc_loss = inc_run.loss # loss of the best config incumbent_configs.append(inc_config) incumbent_losses.append(inc_loss) min_loss, idx = min( (val, idx) for (idx, val) in enumerate(incumbent_losses)) incumbent_model = models[idx] incumbent_config = incumbent_configs[idx] print( "################################### Dataset {} #######################################" .format(dataset)) print("Best performing model: {}".format(incumbent_model)) print("Configuration: {}".format(incumbent_config)) print("Score: {}".format(-min_loss)) print("\n") return incumbent_model, incumbent_config
def get_data(): list_data = [] for log_dir in LOG_DIRS: result = hpres.logged_results_to_HBS_result(log_dir) all_runs = result.get_all_runs() id2conf = result.get_id2config_mapping() # calculate avg. runtime ts = [] for i, run in enumerate(all_runs): t_s = run['time_stamps']['started'] t_f = run['time_stamps']['finished'] ts.append(t_f - t_s) if i >= MAX_VALS: break print(log_dir) print('mean [s]: ' + str(statistics.mean(ts))) print('std [s]: ' + str(statistics.stdev(ts))) # copy data to list data = [] for run in all_runs: avg_rewards = ast.literal_eval(run['info']['score_list']) #print(avg_rewards) config_id = run['config_id'] # handle timeout cases (impute missing values) if avg_rewards[0] < -1e5 and avg_rewards[1] > -1e5: avg_rewards[0] = avg_rewards[1] for k in range(1, len(avg_rewards)): if avg_rewards[k] < -1e5 and avg_rewards[k - 1] > -1e5: avg_rewards[k] = avg_rewards[k - 1] data.append(avg_rewards) list_data.append(data) # copy from list to numpy array proc_data = [] n = len(list_data[0][0]) for data in list_data: np_data = np.zeros([MAX_VALS, n]) for i in range(len(np_data)): np_data[i] = np.array(data[i]) mean = np.mean(np_data, axis=0) std = np.std(np_data, axis=0) proc_data.append((mean, std)) return proc_data, list_data
def parse_results(self, result_logger_dir): res = logged_results_to_HBS_result(result_logger_dir) id2config = res.get_id2config_mapping() incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False) if (len(incumbent_trajectory['config_ids']) == 0): return dict() final_config_id = incumbent_trajectory['config_ids'][-1] return incumbent_trajectory['losses'][-1], id2config[final_config_id]['config'], incumbent_trajectory['budgets'][-1]
def get_trajectories_per_method(methods, suffix='', surrogate=False, append_instead_of_combining=False): print(methods) all_trajectories = {} for m, (paths, surrogate, append_instead_of_combining) in methods.items(): dfs = [] # append configs to one long trajectory if append_instead_of_combining: hp_results = [] for i, path in enumerate(paths): print(path) true_results = hpres.logged_results_to_HBS_result(path) hp_results.append(true_results) true_inc = extract_incumbents(hp_results, surrogate=surrogate) error = 1 - true_inc[:, 0] / 100 times = true_inc[:, 1] df = pd.DataFrame({str(0): error}, index=times) dfs.append(df) # average over trajectories else: for i, path in enumerate(paths): print(path) true_results = hpres.logged_results_to_HBS_result(path) true_inc = extract_incumbents(true_results, surrogate=surrogate) error = 1 - true_inc[:, 0] / 100 times = true_inc[:, 1] df = pd.DataFrame({str(i): error}, index=times) dfs.append(df) df_true = merge_and_fill_trajectories(dfs, default_value=None) all_trajectories[m + suffix] = { 'time_stamps': np.array(df_true.index), 'errors': np.array(df_true.T) } return all_trajectories
def analyze_bohb(log_dir): # load the example run from the log files result = hpres.logged_results_to_HBS_result(log_dir) plot_parallel_scatter(result, with_mirrored_sampling=False, with_nes_step_size=False) #plot_parallel_scatter(result, with_mirrored_sampling=False, with_nes_step_size=True) plot_parallel_scatter(result, with_mirrored_sampling=True, with_nes_step_size=False)
def fit(self, pipeline_config, final_metric_score, optimized_hyperparameter_config, budget, refit=None): if refit or pipeline_config["ensemble_size"] == 0 or pipeline_config[ "task_id"] not in [-1, 1]: return { "final_metric_score": final_metric_score, "optimized_hyperparameter_config": optimized_hyperparameter_config, "budget": budget } filename = os.path.join(pipeline_config["result_logger_dir"], 'predictions_for_ensemble.npy') train_metric = self.pipeline[MetricSelector.get_name()].metrics[ pipeline_config["train_metric"]] y_transform = self.pipeline[ OneHotEncoding.get_name()].complete_y_tranformation result = logged_results_to_HBS_result( pipeline_config["result_logger_dir"]) all_predictions, labels, model_identifiers, _ = read_ensemble_prediction_file( filename=filename, y_transform=y_transform) ensemble_selection, ensemble_configs = build_ensemble( result=result, train_metric=train_metric, minimize=pipeline_config["minimize"], ensemble_size=pipeline_config["ensemble_size"], all_predictions=all_predictions, labels=labels, model_identifiers=model_identifiers, only_consider_n_best=pipeline_config[ "ensemble_only_consider_n_best"], sorted_initialization_n_best=pipeline_config[ "ensemble_sorted_initialization_n_best"]) return { "final_metric_score": final_metric_score, "optimized_hyperparameter_config": optimized_hyperparameter_config, "budget": budget, "ensemble": ensemble_selection, "ensemble_final_metric_score": ensemble_selection.get_validation_performance(), "ensemble_configs": ensemble_configs }
def load_config(dir): ''' Given a directory where BOHB results have been logged, loads the incumbent configuration The directory needs to have 'config.json' and 'results.json' as BOHB outputs :param dir: Directory where BOHB results exist :return: JSON containing incumbent configuration ''' result = hpres.logged_results_to_HBS_result(dir) id2conf = result.get_id2config_mapping() inc_id = result.get_incumbent_id() inc_config = id2conf[inc_id]['config'] return inc_config
def _get_incumbent(self, i): result = self.results[i] config_space = self.config_spaces[i] if isinstance(result, str): result = logged_results_to_HBS_result(result) id2config = result.get_id2config_mapping() trajectory = result.get_incumbent_trajectory( bigger_is_better=self.bigger_is_better, non_decreasing_budget=self.bigger_is_better) incumbent = id2config[trajectory["config_ids"][-1]]["config"] return Configuration(config_space, incumbent)
def add_result(self, result, config_space, origin): try: if isinstance(result, str): result = logged_results_to_HBS_result(result) result.get_incumbent_trajectory() except: print("Did not add empty result") return False self.results.append(result) self.config_spaces.append(config_space) self.origins.append(origin) return True
def visualizeBOHB(log_dir): # load the example run from the log files result = hpres.logged_results_to_HBS_result(log_dir) # get all executed runs all_runs = result.get_all_runs() # get the 'dict' that translates config ids to the actual configurations id2conf = result.get_id2config_mapping() # Here is how you get he incumbent (best configuration) inc_id = result.get_incumbent_id() # let's grab the run on the highest budget inc_runs = result.get_runs_by_id(inc_id) inc_run = inc_runs[-1] # We have access to all information: the config, the loss observed during # optimization, and all the additional information inc_valid_score = inc_run.loss inc_config = id2conf[inc_id]['config'] print(inc_config) print('Best found configuration:') print(inc_config) #print('It achieved accuracies of %f (validation) and %f (test).' % (-inc_valid_score, inc_test_score)) # Let's plot the observed losses grouped by budget, hpvis.losses_over_time(all_runs) # the number of concurent runs, hpvis.concurrent_runs_over_time(all_runs) # and the number of finished runs. hpvis.finished_runs_over_time(all_runs) # This one visualizes the spearman rank correlation coefficients of the losses # between different budgets. hpvis.correlation_across_budgets(result) # For model based optimizers, one might wonder how much the model actually helped. # The next plot compares the performance of configs picked by the model vs. random ones hpvis.performance_histogram_model_vs_random(all_runs, id2conf) plot_accuracy_over_budget(result) plot_parallel_scatter(result) plt.show()
def test_model_warmstarting(self): result1_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_result1") result2_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_result2") empty_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "empty_result") result1 = logged_results_to_HBS_result(result1_path) result2 = logged_results_to_HBS_result(result2_path) cs = ConfigSpace.ConfigurationSpace() cs.add_hyperparameters([ ConfigSpace.hyperparameters.UniformFloatHyperparameter("A", lower=0, upper=10), ConfigSpace.hyperparameters.UniformFloatHyperparameter("B", lower=0, upper=10) ]) # train model builder = WarmstartedModelBuilder() r = builder.train_kde(result1, cs) self.assertEqual(len(r[0]), 1) self.assertEqual(len(r[1]), 1) self.assertEqual(r[0][0].data.shape, (3, 2)) self.assertEqual(r[1][0].data.shape, (6, 2)) # build builder.add_result(empty_path, cs, "empty") builder.add_result(result1, cs, "result1") builder.add_result(result2, cs, "result2") r = builder.build() self.assertEqual(len(r._good_kdes), 2) self.assertEqual(len(r._bad_kdes), 2) self.assertEqual(len(r._kde_config_spaces), 2)
def save_config(source, dest, name): ''' Reads the incumbent from a BOHB output directory and writes it as a JSON in the specified directory :param source: Directory from where to read the incumbent :param dest: Directory to save the file :param name: Name given to the JSON being saved :return: void ''' result = hpres.logged_results_to_HBS_result(source) id2conf = result.get_id2config_mapping() inc_id = result.get_incumbent_id() inc_config = id2conf[inc_id]['config'] f = open(dest + name + '.json', 'w') f.write(json.dumps(inc_config)) f.close()
def parse_results(self, result_logger_dir): try: res = logged_results_to_HBS_result(result_logger_dir) id2config = res.get_id2config_mapping() incumbent_trajectory = res.get_incumbent_trajectory( bigger_is_better=False, non_decreasing_budget=False) except Exception as e: raise RuntimeError( "Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet." ) if (len(incumbent_trajectory['config_ids']) == 0): return dict() final_config_id = incumbent_trajectory['config_ids'][-1] return incumbent_trajectory['losses'][-1], id2config[final_config_id][ 'config'], incumbent_trajectory['budgets'][-1]
def build_run_trajectories(results_folder, autonet_config): # parse results try: res = logged_results_to_HBS_result(results_folder) incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False) except: print("No incumbent trajectory found") return dict() # prepare metric_name = autonet_config["train_metric"] all_metrics = autonet_config["additional_metrics"] + [metric_name] additional_metric_names = ["val_" + m for m in all_metrics] additional_metric_names += ["train_" + m for m in all_metrics] additional_metric_names += autonet_config["additional_logs"] # initialize incumbent trajectories incumbent_trajectories = dict() # save incumbent trajectories incumbent_trajectories[metric_name] = incumbent_trajectory incumbent_trajectory["flipped"] = not autonet_config["minimize"] for name in additional_metric_names: tj = copy(incumbent_trajectory) log_available = [name in run["info"] for config_id, budget in zip(tj["config_ids"], tj["budgets"]) for run in res.get_runs_by_id(config_id) if run["budget"] == budget] tj["losses"] = [run["info"][name] for config_id, budget in zip(tj["config_ids"], tj["budgets"]) for run in res.get_runs_by_id(config_id) if run["budget"] == budget and name in run["info"]] for key, value_list in tj.items(): if key in ["losses", "flipped"]: continue tj[key] = [value for i, value in enumerate(value_list) if log_available[i]] tj["flipped"] = False if tj["losses"]: incumbent_trajectories[name] = tj # assume first random config has been evaluated already at time 0 for name, trajectory in incumbent_trajectories.items(): for key, value_list in trajectory.items(): if not isinstance(value_list, (list, tuple)): continue trajectory[key] = [value_list[0] if key != "times_finished" else 0] + value_list return incumbent_trajectories
def convert(self, folders, output_dir=None): """Convert hpbandster-results into smac-format, aggregating parallel runs along the budgets, so it is treated as one run with the same budgets. Throws ValueError when budgets of individual runs dont match. WIP: make hpbandsterconversion not aggregate parallel runs Parameters ---------- folders: List[str] list of parallel hpbandster-runs (folder paths!) output_dir: str path to CAVE's output-directory Returns ------- folder2result: {str : hpbandster.core.result} map parallel-run-folder-paths to hpbandster-result in original format folder2budgets: {str : {str or int or float : str}} map folder to budget to pathpaths to converted data """ try: from hpbandster.core.result import Result as HPBResult from hpbandster.core.result import logged_results_to_HBS_result except ImportError as e: raise ImportError( "To analyze BOHB-data, please install hpbandster (e.g. `pip install hpbandster`)" ) # Original hpbandster-formatted result-object folder2result = OrderedDict([(f, logged_results_to_HBS_result(f)) for f in folders]) # Get a list with alternative interpretations of the configspace-file (if it's a .pcs-file, for .json-files it's # only one element) cs_interpretations = self.load_configspace(folders[0]) # Using temporary files for the intermediate smac-result-like format if not output_dir: self.logger.debug("New outputdir") output_dir = tempfile.mkdtemp() # Actual conversion folder2budgets = self.hpbandster2smac(folder2result, cs_interpretations, output_dir) return folder2result, folder2budgets
def build(self): good_kdes = list() bad_kdes = list() for i, (result, config_space, origin) in enumerate( zip(self.results, self.config_spaces, self.origins)): print(i) if isinstance(result, str): try: result = logged_results_to_HBS_result(result) except: continue good, bad, budgets = self.train_kde(result, config_space) good_kdes.append(dict(zip(budgets, good))) bad_kdes.append(dict(zip(budgets, bad))) self.kde_config_spaces.append(config_space) return WarmstartedModel(good_kdes, bad_kdes, self.kde_config_spaces, self.origins)
def fit(self, result_dir, fit_duration, final_score, autonet, task_id): if (task_id not in [-1, 1]): time.sleep(60) return dict() logging.getLogger('benchmark').info("Create and save summary") autonet_config = autonet.autonet_config res = logged_results_to_HBS_result(result_dir) id2config = res.get_id2config_mapping() incumbent_trajectory = res.get_incumbent_trajectory( bigger_is_better=False, non_decreasing_budget=False) final_config_id = incumbent_trajectory['config_ids'][-1] final_budget = incumbent_trajectory['budgets'][-1] incumbent_config = id2config[final_config_id]['config'] final_info = [ run["info"] for run in res.get_runs_by_id(final_config_id) if run["budget"] == final_budget ][0] summary = dict() summary["final_loss"] = final_score if autonet_config[ "minimize"] else -final_score summary["incumbent_config"] = incumbent_config summary["duration"] = fit_duration for name in autonet_config['additional_metrics'] + [ autonet_config["train_metric"] ]: try: summary["final_" + name] = final_info["val_" + name] except: summary["final_" + name] = final_info["train_" + name] for name in autonet_config['additional_logs']: try: summary["final_" + name] = final_info[name] except: pass # write as json with open(os.path.join(result_dir, "summary.json"), "w") as f: json.dump(summary, f) return dict()
def incumbent_to_config(experiment_path, configs_path, output_dir): # Read the incumbent result = hpres.logged_results_to_HBS_result(str(experiment_path)) id2conf = result.get_id2config_mapping() inc_id = result.get_incumbent_id() incumbent_config = id2conf[inc_id]['config'] # Read the default config default_config_path = configs_path / "default.yaml" with default_config_path.open() as in_stream: default_config = yaml.safe_load(in_stream) # Compute and write incumbent config in the format of default_config incumbent_config = construct_model_config(incumbent_config, default_config) out_config_path = output_dir / "{}.yaml".format(experiment_path.name) with out_config_path.open("w") as out_stream: yaml.dump(incumbent_config, out_stream)
def analyze_bohb(log_dir): # load the example run from the log files result = hpres.logged_results_to_HBS_result(log_dir) result = transform_result(result, min_success_reward=MIN_SUCCESS_REWARD) #result = remove_outliers(result) plot_parallel_scatter(result, with_mirrored_sampling=False, with_nes_step_size=False) plot_parallel_scatter(result, with_mirrored_sampling=False, with_nes_step_size=True) plot_parallel_scatter(result, with_mirrored_sampling=True, with_nes_step_size=False) plot_parallel_scatter(result, with_mirrored_sampling=True, with_nes_step_size=True)
def convert(self, folders, output_dir=None): """Convert hpbandster-results into smac-format, aggregating parallel runs along the budgets, so it is treated as one run with the same budgets. Throws ValueError when budgets of individual runs dont match. Parameters ---------- folders: List[str] list of runs to consider output_dir: str path to CAVE's output-directory Returns ------- result: hpbandster.core.result BOHB-result in original format paths: List[str] paths to converted data budgets: List[int] budgets, corresponding to paths """ try: from hpbandster.core.result import Result as HPBResult from hpbandster.core.result import logged_results_to_HBS_result except ImportError as e: raise ImportError( "To analyze BOHB-data, please install hpbandster (e.g. `pip install hpbandster`)" ) folder2result = OrderedDict([(f, logged_results_to_HBS_result(f)) for f in folders]) # backup_cs is a list with alternative interpretations of the configspace-file (if it's a .pcs-file) cs, backup_cs = self.load_configspace(folders[0]) # Using temporary files for the intermediate smac-result-like format if not output_dir: self.logger.debug("New outputdir") output_dir = tempfile.mkdtemp() budgets, paths = zip(*self.hpbandster2smac( folder2result, cs, backup_cs, output_dir).items()) return list(folder2result.values()), paths, budgets
def fit(self, pipeline_config, run_result_dir, train_metric, trajectories): ensemble_log_file = os.path.join(run_result_dir, "ensemble_log.json") test_log_file = os.path.join(run_result_dir, "test_result.json") if not pipeline_config["enable_ensemble"] or train_metric is None or \ (not os.path.exists(ensemble_log_file) and not os.path.exists(test_log_file)): return {"trajectories": trajectories, "train_metric": train_metric} try: started = logged_results_to_HBS_result(run_result_dir).HB_config["time_ref"] except: return {"trajectories": trajectories, "train_metric": train_metric} ensemble_trajectories = dict() test_trajectories = dict() if os.path.exists(ensemble_log_file): ensemble_trajectories = get_ensemble_trajectories(ensemble_log_file, started) if os.path.exists(test_log_file): test_trajectories = get_ensemble_trajectories(test_log_file, started, prefix="", only_test=True) return {"trajectories": dict(trajectories, **ensemble_trajectories, **test_trajectories), "train_metric": train_metric}