def visualizeBOHB(log_dir):
    # load the example run from the log files
    result = hpres.logged_results_to_HBS_result(log_dir)

    # get all executed runs
    all_runs = result.get_all_runs()

    # get the 'dict' that translates config ids to the actual configurations
    id2conf = result.get_id2config_mapping()

    # Here is how you get he incumbent (best configuration)
    inc_id = result.get_incumbent_id()

    # let's grab the run on the highest budget
    inc_runs = result.get_runs_by_id(inc_id)
    inc_run = inc_runs[-1]

    # We have access to all information: the config, the loss observed during
    # optimization, and all the additional information
    inc_valid_score = inc_run.loss
    inc_config = id2conf[inc_id]['config']

    print(inc_config)

    print('Best found configuration:')
    print(inc_config)
    #print('It achieved accuracies of %f (validation) and %f (test).' % (-inc_valid_score, inc_test_score))

    # Let's plot the observed losses grouped by budget,
    hpvis.losses_over_time(all_runs)

    # the number of concurent runs,
    hpvis.concurrent_runs_over_time(all_runs)

    # and the number of finished runs.
    hpvis.finished_runs_over_time(all_runs)

    # This one visualizes the spearman rank correlation coefficients of the losses
    # between different budgets.
    hpvis.correlation_across_budgets(result)

    # For model based optimizers, one might wonder how much the model actually helped.
    # The next plot compares the performance of configs picked by the model vs. random ones
    hpvis.performance_histogram_model_vs_random(all_runs, id2conf)

    plot_accuracy_over_budget(result)

    plot_parallel_scatter(result)

    plt.show()
예제 #2
0
def generateViz(out_dir, show=False):
    '''
    Generate plots for BOHB (from BOHB_visualizations.py from the documentations)
    :param out_dir: Directory to save the plots
    :param show: True/False to display the plots (additionally to saving)
    :return: void
    '''
    result = hpres.logged_results_to_HBS_result(out_dir)
    # get all executed runs
    all_runs = result.get_all_runs()
    # get the 'dict' that translates config ids to the actual configurations
    id2conf = result.get_id2config_mapping()
    # Let's plot the observed losses grouped by budget,
    hpvis.losses_over_time(all_runs)
    plt.tight_layout()
    plt.savefig(out_dir + '/plot_losses_over_time.png', dpi=300)
    # the number of concurent runs,
    hpvis.concurrent_runs_over_time(all_runs)
    plt.tight_layout()
    plt.savefig(out_dir + '/plot_concurrent_runs_over_time.png', dpi=300)
    # and the number of finished runs.
    hpvis.finished_runs_over_time(all_runs)
    plt.tight_layout()
    plt.savefig(out_dir + '/plot_finished_runs_over_time.png', dpi=300)
    # This one visualizes the spearman rank correlation coefficients of the losses
    # between different budgets.
    hpvis.correlation_across_budgets(result)
    figure = plt.gcf()
    figure.set_size_inches(10, 10)
    plt.savefig(out_dir + '/plot_correlation_across_budgets.png', dpi=300)
    # For model based optimizers, one might wonder how much the model actually helped.
    # The next plot compares the performance of configs picked by the model vs. random ones
    hpvis.performance_histogram_model_vs_random(all_runs, id2conf)
    figure = plt.gcf()
    figure.set_size_inches(10, 10)
    plt.savefig(out_dir + '/plot_performance_histogram.png', dpi=150)
    if show:
        plt.show()
예제 #3
0
    print(inc_config)
    print('It achieved accuracy of %f (train) and %f (test).' %
          (inc_train_loss, inc_test_loss))

    # Let's plot the observed losses grouped by budget,
    hpvis.losses_over_time(all_runs)

    # the number of concurent runs,
    hpvis.concurrent_runs_over_time(all_runs)

    # and the number of finished runs.
    hpvis.finished_runs_over_time(all_runs)

    # This one visualizes the spearman rank correlation coefficients of the losses
    # between different budgets.
    hpvis.correlation_across_budgets(result)

    # For model based optimizers, one might wonder how much the model actually helped.
    # The next plot compares the performance of configs picked by the model vs. random ones
    hpvis.performance_histogram_model_vs_random(all_runs, id2conf)

    plt.show()

    d1 = res.get_pandas_dataframe()[0]
    loss = res.get_pandas_dataframe()[1]

    d1['loss'] = loss

if False:
    result = res
    # get all executed runs
예제 #4
0
파일: analysis.py 프로젝트: LMZimmer/temp
def analysis(run_name):
    """
    Function to create plots of the current hpo runs.
    :param run_name:
    :return:
    """
    # load the example run from the log files
    result = hpres.logged_results_to_HBS_result(run_name)

    # get all executed runs
    all_runs = result.get_all_runs()
    '''
    # Plot mse loss vs 1 - test correlation
    losses = []
    corrs = []
    for conf in all_runs:
        if conf['info'] is not None and conf['loss'] is not None and conf['loss'] != np.nan and conf['loss'] < 1.0:
            loss = conf['loss']
            losses.append(loss)
            corrs.append(1 - conf['info'][0]['test_kendall_tau'])
    fig = scatter_plot(np.array(losses), np.array(corrs), 'MSE loss', '1 - Test Correlation', title=None)
    fig.savefig(os.path.join(run_name, 'correlation_mse_loss_vs_test_correlation.pdf'))
    plt.close()
    '''
    # Plot mse loss vs 1 - extrapolation correlation
    losses = []
    extra_corrs = []
    budgets = []
    for conf in all_runs:
        if conf['info'] is not None and conf['loss'] is not None and conf[
                'loss'] != np.nan and conf['loss'] != np.inf and not math.isnan(
                    conf.info[0]['test_kendall_tau']):
            loss = conf['loss']
            losses.append(loss)
            extra_corrs.append(1 - conf['info'][0]['test_kendall_tau'])
            budgets.append(conf['time_stamps']['finished'] -
                           conf['time_stamps']['started'])

    print('Highest kendall tau test correlation', 1 - min(extra_corrs))
    # MSE loss vs. Num. Epochs
    plt.figure()
    plt.ylabel('1 - Extrapolation Correlation')
    plt.xlabel('MSE loss')
    ax = plt.gca()
    ax.set_yscale('log')
    plt.ylim(min(extra_corrs), max(extra_corrs))
    ax.set_xscale('log')
    plt.xlim(min(losses), max(losses))
    plt.scatter(losses, extra_corrs, s=2, alpha=0.8, c=budgets)
    cbar = plt.colorbar()
    cbar.set_label('Runtime (s)', rotation=270)
    plt.grid(True, which="both", ls="-", alpha=0.5)
    plt.tight_layout()
    plt.savefig(
        os.path.join(run_name,
                     'correlation_mse_loss_vs_extrapolation_correlation.pdf'))
    plt.close()

    # MSE loss vs. Num. Epochs
    plt.figure()
    plt.ylabel('1 - Extrapolation Correlation')
    plt.xlabel('Runtime (s)')
    ax = plt.gca()
    ax.set_yscale('log')
    plt.ylim(min(extra_corrs), max(extra_corrs))
    ax.set_xscale('log')
    plt.xlim(min(budgets), max(budgets))
    plt.scatter(budgets, extra_corrs, s=8, alpha=1.0)
    plt.grid(True, which="both", ls="-", alpha=0.5)
    plt.tight_layout()
    plt.savefig(
        os.path.join(run_name, 'num_epochs_vs_extrapolation_correlation.pdf'))
    plt.close()

    print(
        'Maximum of extrapolation correlation', 1 -
        np.min(np.array(extra_corrs)[np.logical_not(np.isnan(extra_corrs))]))

    # get the 'dict' that translates config ids to the actual configurations
    id2conf = result.get_id2config_mapping()

    # Here is how you get the incumbent (best configuration)
    inc_id = result.get_incumbent_id()
    if inc_id is not None:
        print('Incumbent ID', inc_id)
        # let's grab the run on the highest budgets
        inc_runs = result.get_runs_by_id(inc_id)
        inc_run = inc_runs[-1]

        # We have access to all information: the config, the loss observed during
        # optimization, and all the additional information
        inc_loss = inc_run.loss
        inc_config = id2conf[inc_id]['config']

        print(inc_run.info)
        # inc_val_corr = inc_run.info[0]['valid_corr']
        # inc_test_corr = inc_run.info[0]['test_corr']
        # extrapolation_corr = inc_run.info[0]['extrapolation_corr']

        print('Best found configuration:')
        print(inc_config)
        print(
            'It achieved validation MSE loss of %f (validation) and corr %f (validation)/ %f (test), extrapolation corr %f.'
            % (inc_loss, 1, 1,
               1))  # , inc_val_corr, inc_test_corr, extrapolation_corr))

    # Let's plot the observed losses grouped by budget,
    losses_over_time(all_runs)
    ax = plt.gca()
    ax.set_yscale('log')
    plt.tight_layout()
    plt.savefig(os.path.join(run_name, 'loss_over_time.pdf'))
    plt.close()

    # the number of concurent runs,
    hpvis.concurrent_runs_over_time(all_runs)
    plt.tight_layout()
    plt.savefig(os.path.join(run_name, 'concurrent_runs_over_time.pdf'))
    plt.close()

    # and the number of finished runs.
    hpvis.finished_runs_over_time(all_runs)
    plt.tight_layout()
    plt.savefig(os.path.join(run_name, 'finished_runs_over_time.pdf'))
    plt.close()

    # This one visualizes the spearman rank correlation coefficients of the losses
    # between different budgets.
    hpvis.correlation_across_budgets(result)
    plt.tight_layout()
    plt.savefig(os.path.join(run_name, 'correlation_across_budgets.pdf'))
    plt.close()

    if "random" or "RS" not in run_name:
        # For model based optimizers, one might wonder how much the model actually helped.
        # The next plot compares the performance of configs picked by the model vs. random ones
        hpvis.performance_histogram_model_vs_random(all_runs, id2conf)
        plt.tight_layout()
        plt.savefig(
            os.path.join(run_name,
                         'performance_histogram_model_vs_random.pdf'))
    plt.close()
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--result',
        type=str,
        required=True,
        help=
        "Final result Pickle file or master directory (for running experiments)"
    )
    parser.add_argument('--mode',
                        type=str,
                        choices=('save', 'show', 'disable'),
                        default='disable',
                        help="Plot mode")
    parser.add_argument('--out-path',
                        type=str,
                        default=None,
                        help="Default output path for plots")
    parser.add_argument('--dpi', type=int, default=150, help="Plot resolution")

    args = parser.parse_args()

    # load run results
    if os.path.isfile(args.result):
        default_out_path = os.path.dirname(os.path.abspath(args.result))
        exp_name = os.path.splitext(os.path.basename(args.result))[0]
        with open(args.result, 'rb') as fp:
            result = pickle.load(fp)
    elif os.path.isdir(args.result):
        default_out_path = args.result
        exp_name = 'exp'
        result = hpres.logged_results_to_HBS_result(args.result)
    else:
        print("No input specified. Use --result")
        return

    save_figs = args.mode is None or args.mode == 'save'
    show_figs = args.mode is None or args.mode == 'show'

    # File path
    out_path = args.out_path or default_out_path

    # Get all executed runs
    all_runs = result.get_all_runs()

    # Get the 'dict' that translates config ids to the actual configurations
    id2conf = result.get_id2config_mapping()

    # Here is how you get he incumbent (best configuration)
    inc_id = result.get_incumbent_id()

    # let's grab the run on the highest budget
    inc_runs = result.get_runs_by_id(inc_id)
    inc_run = inc_runs[-1]

    # We have access to all information: the config, the loss observed during
    # optimization, and all the additional information
    inc_loss = inc_run.loss
    inc_config = id2conf[inc_id]['config']

    # Each run contains one or more trainings
    # chosen_accs: list of the chosen best model accuracy (according to the bohb loss) for each single training
    chosen_accs = []

    all_accs = []
    for single_info in inc_run.info['single_info']:
        # All the BOHB losses of this training (one per epoch)
        bohb_losses = np.array(single_info['bohb_losses'])
        # Let's find the best one
        best_index = bohb_losses.argmin()
        # Add the best model (according to the bohb loss) accuracy to chosen_accs
        chosen_accs.append(single_info['target_accuracy'][best_index])
        # Add all the accuracies of all the epochs of this training
        all_accs.append(single_info['target_accuracy'])
    # Get mean accuracy for this run (average the selected models for each training)
    acc = np.array(chosen_accs).mean()
    # Matrix containing ALL the target accuracies of all the epochs of all the trainings of this run
    all_accs = np.array(all_accs)

    # Print best configuration
    print('Best found configuration:')
    for k in inc_config:
        nice_print(k, inc_config[k])
    nice_print('inc_id', '-'.join(map(str, inc_id)))

    print()
    print("Performance:")
    criterion_names = {
        'regression': 'Regression',
        'target_accuracy': 'Trg accuracy',
        'target_entropy_loss': 'Trg entropy',
        'target_div_loss': 'Trg diversity',
        'target_class_loss': 'Trg class. loss',
        'target_silhouette_score': 'Trg Silhouette',
        'target_calinski_harabasz_score': 'Trg Calinski-Harabasz'
    }
    # Print info
    cname = inc_run.info['criterion']
    nice_print(criterion_names.get(cname, cname), f"{inc_loss:.10f}")
    nice_print(
        "Accuracy",
        f"{acc * 100:.4f} % (mean of each selected model in selected conf run trainings)"
    )
    nice_print(
        "Accuracy",
        f"{all_accs.max(initial=-1) * 100:.4f} % (best in selected run, you shouldn't know this)"
    )

    print()
    print("Resources:")
    nice_print(
        "Total time",
        datetime.timedelta(seconds=all_runs[-1].time_stamps['finished'] -
                           all_runs[0].time_stamps['started']))
    durations = list(
        map(lambda r: r.time_stamps['finished'] - r.time_stamps['started'],
            all_runs))
    nice_print("Number of runs", len(all_runs))
    nice_print("Longest run", datetime.timedelta(seconds=max(durations)))
    nice_print("Shortest run", datetime.timedelta(seconds=min(durations)))

    gpu_seconds = sum([
        r.time_stamps['finished'] - r.time_stamps['started'] for r in all_runs
    ])
    nice_print("GPU time", datetime.timedelta(seconds=gpu_seconds))

    if not (save_figs or show_figs):
        return

    print()
    print("Generating plots")

    # Let's plot the observed losses grouped by budget,
    hpvis.losses_over_time(all_runs)
    if save_figs:
        plt.savefig(os.path.join(out_path,
                                 'loss-over-time_{}.png'.format(exp_name)),
                    dpi=args.dpi)

    # the number of concurrent runs,
    hpvis.concurrent_runs_over_time(all_runs)
    if save_figs:
        plt.savefig(os.path.join(out_path,
                                 'concurrent-runs_{}.png'.format(exp_name)),
                    dpi=args.dpi)

    # and the number of finished runs.
    hpvis.finished_runs_over_time(all_runs)
    if save_figs:
        plt.savefig(os.path.join(out_path,
                                 'finished-runs_{}.png'.format(exp_name)),
                    dpi=args.dpi)

    # This one visualizes the spearman rank correlation coefficients of the losses
    # between different budgets.
    hpvis.correlation_across_budgets(result)
    if save_figs:
        plt.savefig(os.path.join(out_path,
                                 'correlation_{}.png'.format(exp_name)),
                    dpi=args.dpi)

    # For model based optimizers, one might wonder how much the model actually helped.
    # The next plot compares the performance of configs picked by the model vs. random ones
    hpvis.performance_histogram_model_vs_random(all_runs, id2conf)
    if save_figs:
        plt.savefig(os.path.join(out_path,
                                 'model-vs-random_{}.png'.format(exp_name)),
                    dpi=args.dpi)

    sensitivity_plot(all_runs,
                     id2conf,
                     cvars=('disc.num_fc_layers', 'disc.hidden_size_log',
                            'disc.dropout', 'net.bottleneck_size_log',
                            'base.weight_da'))
    if save_figs:
        plt.savefig(os.path.join(out_path,
                                 'sensitivity_{}.png'.format(exp_name)),
                    dpi=args.dpi)

    if show_figs:
        plt.show()