def visualizeBOHB(log_dir): # load the example run from the log files result = hpres.logged_results_to_HBS_result(log_dir) # get all executed runs all_runs = result.get_all_runs() # get the 'dict' that translates config ids to the actual configurations id2conf = result.get_id2config_mapping() # Here is how you get he incumbent (best configuration) inc_id = result.get_incumbent_id() # let's grab the run on the highest budget inc_runs = result.get_runs_by_id(inc_id) inc_run = inc_runs[-1] # We have access to all information: the config, the loss observed during # optimization, and all the additional information inc_valid_score = inc_run.loss inc_config = id2conf[inc_id]['config'] print(inc_config) print('Best found configuration:') print(inc_config) #print('It achieved accuracies of %f (validation) and %f (test).' % (-inc_valid_score, inc_test_score)) # Let's plot the observed losses grouped by budget, hpvis.losses_over_time(all_runs) # the number of concurent runs, hpvis.concurrent_runs_over_time(all_runs) # and the number of finished runs. hpvis.finished_runs_over_time(all_runs) # This one visualizes the spearman rank correlation coefficients of the losses # between different budgets. hpvis.correlation_across_budgets(result) # For model based optimizers, one might wonder how much the model actually helped. # The next plot compares the performance of configs picked by the model vs. random ones hpvis.performance_histogram_model_vs_random(all_runs, id2conf) plot_accuracy_over_budget(result) plot_parallel_scatter(result) plt.show()
def generateViz(out_dir, show=False): ''' Generate plots for BOHB (from BOHB_visualizations.py from the documentations) :param out_dir: Directory to save the plots :param show: True/False to display the plots (additionally to saving) :return: void ''' result = hpres.logged_results_to_HBS_result(out_dir) # get all executed runs all_runs = result.get_all_runs() # get the 'dict' that translates config ids to the actual configurations id2conf = result.get_id2config_mapping() # Let's plot the observed losses grouped by budget, hpvis.losses_over_time(all_runs) plt.tight_layout() plt.savefig(out_dir + '/plot_losses_over_time.png', dpi=300) # the number of concurent runs, hpvis.concurrent_runs_over_time(all_runs) plt.tight_layout() plt.savefig(out_dir + '/plot_concurrent_runs_over_time.png', dpi=300) # and the number of finished runs. hpvis.finished_runs_over_time(all_runs) plt.tight_layout() plt.savefig(out_dir + '/plot_finished_runs_over_time.png', dpi=300) # This one visualizes the spearman rank correlation coefficients of the losses # between different budgets. hpvis.correlation_across_budgets(result) figure = plt.gcf() figure.set_size_inches(10, 10) plt.savefig(out_dir + '/plot_correlation_across_budgets.png', dpi=300) # For model based optimizers, one might wonder how much the model actually helped. # The next plot compares the performance of configs picked by the model vs. random ones hpvis.performance_histogram_model_vs_random(all_runs, id2conf) figure = plt.gcf() figure.set_size_inches(10, 10) plt.savefig(out_dir + '/plot_performance_histogram.png', dpi=150) if show: plt.show()
#optimization, and all the additional information inc_loss = inc_run.loss inc_config = id2conf[inc_id]['config'] inc_train_loss = inc_run.info['train_accuracy'] inc_test_loss = inc_run.info['test_accuracy'] print('Best found configuration:') print(inc_config) print('It achieved accuracy of %f (train) and %f (test).' % (inc_train_loss, inc_test_loss)) # Let's plot the observed losses grouped by budget, hpvis.losses_over_time(all_runs) # the number of concurent runs, hpvis.concurrent_runs_over_time(all_runs) # and the number of finished runs. hpvis.finished_runs_over_time(all_runs) # This one visualizes the spearman rank correlation coefficients of the losses # between different budgets. hpvis.correlation_across_budgets(result) # For model based optimizers, one might wonder how much the model actually helped. # The next plot compares the performance of configs picked by the model vs. random ones hpvis.performance_histogram_model_vs_random(all_runs, id2conf) plt.show() d1 = res.get_pandas_dataframe()[0]
def analysis(run_name): """ Function to create plots of the current hpo runs. :param run_name: :return: """ # load the example run from the log files result = hpres.logged_results_to_HBS_result(run_name) # get all executed runs all_runs = result.get_all_runs() ''' # Plot mse loss vs 1 - test correlation losses = [] corrs = [] for conf in all_runs: if conf['info'] is not None and conf['loss'] is not None and conf['loss'] != np.nan and conf['loss'] < 1.0: loss = conf['loss'] losses.append(loss) corrs.append(1 - conf['info'][0]['test_kendall_tau']) fig = scatter_plot(np.array(losses), np.array(corrs), 'MSE loss', '1 - Test Correlation', title=None) fig.savefig(os.path.join(run_name, 'correlation_mse_loss_vs_test_correlation.pdf')) plt.close() ''' # Plot mse loss vs 1 - extrapolation correlation losses = [] extra_corrs = [] budgets = [] for conf in all_runs: if conf['info'] is not None and conf['loss'] is not None and conf[ 'loss'] != np.nan and conf['loss'] != np.inf and not math.isnan( conf.info[0]['test_kendall_tau']): loss = conf['loss'] losses.append(loss) extra_corrs.append(1 - conf['info'][0]['test_kendall_tau']) budgets.append(conf['time_stamps']['finished'] - conf['time_stamps']['started']) print('Highest kendall tau test correlation', 1 - min(extra_corrs)) # MSE loss vs. Num. Epochs plt.figure() plt.ylabel('1 - Extrapolation Correlation') plt.xlabel('MSE loss') ax = plt.gca() ax.set_yscale('log') plt.ylim(min(extra_corrs), max(extra_corrs)) ax.set_xscale('log') plt.xlim(min(losses), max(losses)) plt.scatter(losses, extra_corrs, s=2, alpha=0.8, c=budgets) cbar = plt.colorbar() cbar.set_label('Runtime (s)', rotation=270) plt.grid(True, which="both", ls="-", alpha=0.5) plt.tight_layout() plt.savefig( os.path.join(run_name, 'correlation_mse_loss_vs_extrapolation_correlation.pdf')) plt.close() # MSE loss vs. Num. Epochs plt.figure() plt.ylabel('1 - Extrapolation Correlation') plt.xlabel('Runtime (s)') ax = plt.gca() ax.set_yscale('log') plt.ylim(min(extra_corrs), max(extra_corrs)) ax.set_xscale('log') plt.xlim(min(budgets), max(budgets)) plt.scatter(budgets, extra_corrs, s=8, alpha=1.0) plt.grid(True, which="both", ls="-", alpha=0.5) plt.tight_layout() plt.savefig( os.path.join(run_name, 'num_epochs_vs_extrapolation_correlation.pdf')) plt.close() print( 'Maximum of extrapolation correlation', 1 - np.min(np.array(extra_corrs)[np.logical_not(np.isnan(extra_corrs))])) # get the 'dict' that translates config ids to the actual configurations id2conf = result.get_id2config_mapping() # Here is how you get the incumbent (best configuration) inc_id = result.get_incumbent_id() if inc_id is not None: print('Incumbent ID', inc_id) # let's grab the run on the highest budgets inc_runs = result.get_runs_by_id(inc_id) inc_run = inc_runs[-1] # We have access to all information: the config, the loss observed during # optimization, and all the additional information inc_loss = inc_run.loss inc_config = id2conf[inc_id]['config'] print(inc_run.info) # inc_val_corr = inc_run.info[0]['valid_corr'] # inc_test_corr = inc_run.info[0]['test_corr'] # extrapolation_corr = inc_run.info[0]['extrapolation_corr'] print('Best found configuration:') print(inc_config) print( 'It achieved validation MSE loss of %f (validation) and corr %f (validation)/ %f (test), extrapolation corr %f.' % (inc_loss, 1, 1, 1)) # , inc_val_corr, inc_test_corr, extrapolation_corr)) # Let's plot the observed losses grouped by budget, losses_over_time(all_runs) ax = plt.gca() ax.set_yscale('log') plt.tight_layout() plt.savefig(os.path.join(run_name, 'loss_over_time.pdf')) plt.close() # the number of concurent runs, hpvis.concurrent_runs_over_time(all_runs) plt.tight_layout() plt.savefig(os.path.join(run_name, 'concurrent_runs_over_time.pdf')) plt.close() # and the number of finished runs. hpvis.finished_runs_over_time(all_runs) plt.tight_layout() plt.savefig(os.path.join(run_name, 'finished_runs_over_time.pdf')) plt.close() # This one visualizes the spearman rank correlation coefficients of the losses # between different budgets. hpvis.correlation_across_budgets(result) plt.tight_layout() plt.savefig(os.path.join(run_name, 'correlation_across_budgets.pdf')) plt.close() if "random" or "RS" not in run_name: # For model based optimizers, one might wonder how much the model actually helped. # The next plot compares the performance of configs picked by the model vs. random ones hpvis.performance_histogram_model_vs_random(all_runs, id2conf) plt.tight_layout() plt.savefig( os.path.join(run_name, 'performance_histogram_model_vs_random.pdf')) plt.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--result', type=str, required=True, help= "Final result Pickle file or master directory (for running experiments)" ) parser.add_argument('--mode', type=str, choices=('save', 'show', 'disable'), default='disable', help="Plot mode") parser.add_argument('--out-path', type=str, default=None, help="Default output path for plots") parser.add_argument('--dpi', type=int, default=150, help="Plot resolution") args = parser.parse_args() # load run results if os.path.isfile(args.result): default_out_path = os.path.dirname(os.path.abspath(args.result)) exp_name = os.path.splitext(os.path.basename(args.result))[0] with open(args.result, 'rb') as fp: result = pickle.load(fp) elif os.path.isdir(args.result): default_out_path = args.result exp_name = 'exp' result = hpres.logged_results_to_HBS_result(args.result) else: print("No input specified. Use --result") return save_figs = args.mode is None or args.mode == 'save' show_figs = args.mode is None or args.mode == 'show' # File path out_path = args.out_path or default_out_path # Get all executed runs all_runs = result.get_all_runs() # Get the 'dict' that translates config ids to the actual configurations id2conf = result.get_id2config_mapping() # Here is how you get he incumbent (best configuration) inc_id = result.get_incumbent_id() # let's grab the run on the highest budget inc_runs = result.get_runs_by_id(inc_id) inc_run = inc_runs[-1] # We have access to all information: the config, the loss observed during # optimization, and all the additional information inc_loss = inc_run.loss inc_config = id2conf[inc_id]['config'] # Each run contains one or more trainings # chosen_accs: list of the chosen best model accuracy (according to the bohb loss) for each single training chosen_accs = [] all_accs = [] for single_info in inc_run.info['single_info']: # All the BOHB losses of this training (one per epoch) bohb_losses = np.array(single_info['bohb_losses']) # Let's find the best one best_index = bohb_losses.argmin() # Add the best model (according to the bohb loss) accuracy to chosen_accs chosen_accs.append(single_info['target_accuracy'][best_index]) # Add all the accuracies of all the epochs of this training all_accs.append(single_info['target_accuracy']) # Get mean accuracy for this run (average the selected models for each training) acc = np.array(chosen_accs).mean() # Matrix containing ALL the target accuracies of all the epochs of all the trainings of this run all_accs = np.array(all_accs) # Print best configuration print('Best found configuration:') for k in inc_config: nice_print(k, inc_config[k]) nice_print('inc_id', '-'.join(map(str, inc_id))) print() print("Performance:") criterion_names = { 'regression': 'Regression', 'target_accuracy': 'Trg accuracy', 'target_entropy_loss': 'Trg entropy', 'target_div_loss': 'Trg diversity', 'target_class_loss': 'Trg class. loss', 'target_silhouette_score': 'Trg Silhouette', 'target_calinski_harabasz_score': 'Trg Calinski-Harabasz' } # Print info cname = inc_run.info['criterion'] nice_print(criterion_names.get(cname, cname), f"{inc_loss:.10f}") nice_print( "Accuracy", f"{acc * 100:.4f} % (mean of each selected model in selected conf run trainings)" ) nice_print( "Accuracy", f"{all_accs.max(initial=-1) * 100:.4f} % (best in selected run, you shouldn't know this)" ) print() print("Resources:") nice_print( "Total time", datetime.timedelta(seconds=all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started'])) durations = list( map(lambda r: r.time_stamps['finished'] - r.time_stamps['started'], all_runs)) nice_print("Number of runs", len(all_runs)) nice_print("Longest run", datetime.timedelta(seconds=max(durations))) nice_print("Shortest run", datetime.timedelta(seconds=min(durations))) gpu_seconds = sum([ r.time_stamps['finished'] - r.time_stamps['started'] for r in all_runs ]) nice_print("GPU time", datetime.timedelta(seconds=gpu_seconds)) if not (save_figs or show_figs): return print() print("Generating plots") # Let's plot the observed losses grouped by budget, hpvis.losses_over_time(all_runs) if save_figs: plt.savefig(os.path.join(out_path, 'loss-over-time_{}.png'.format(exp_name)), dpi=args.dpi) # the number of concurrent runs, hpvis.concurrent_runs_over_time(all_runs) if save_figs: plt.savefig(os.path.join(out_path, 'concurrent-runs_{}.png'.format(exp_name)), dpi=args.dpi) # and the number of finished runs. hpvis.finished_runs_over_time(all_runs) if save_figs: plt.savefig(os.path.join(out_path, 'finished-runs_{}.png'.format(exp_name)), dpi=args.dpi) # This one visualizes the spearman rank correlation coefficients of the losses # between different budgets. hpvis.correlation_across_budgets(result) if save_figs: plt.savefig(os.path.join(out_path, 'correlation_{}.png'.format(exp_name)), dpi=args.dpi) # For model based optimizers, one might wonder how much the model actually helped. # The next plot compares the performance of configs picked by the model vs. random ones hpvis.performance_histogram_model_vs_random(all_runs, id2conf) if save_figs: plt.savefig(os.path.join(out_path, 'model-vs-random_{}.png'.format(exp_name)), dpi=args.dpi) sensitivity_plot(all_runs, id2conf, cvars=('disc.num_fc_layers', 'disc.hidden_size_log', 'disc.dropout', 'net.bottleneck_size_log', 'base.weight_da')) if save_figs: plt.savefig(os.path.join(out_path, 'sensitivity_{}.png'.format(exp_name)), dpi=args.dpi) if show_figs: plt.show()