コード例 #1
0
def gen_grid(args, config, config_budget={}):
    task_name = '{}_grid_{}'.format(get_fname(args.config),
                                    get_fname(args.grid))
    fname_start = get_fname(args.config)
    out_dir = '{}/{}'.format(args.out_dir, task_name)
    makedirs_rm_exist(out_dir)
    config['out_dir'] = os.path.join(config['out_dir'], task_name)

    outs = load_search_file(args.grid)
    for i, out in enumerate(outs):
        vars_label = [row[0].split('.') for row in out]
        vars_alias = [row[1] for row in out]
        vars_value = grid2list([string_to_python(row[2]) for row in out])
        if i == 0:
            print('Variable label: {}'.format(vars_label))
            print('Variable alias: {}'.format(vars_alias))

        for vars in vars_value:
            config_out = config.copy()
            fname_out = fname_start
            for id, var in enumerate(vars):
                if len(vars_label[id]) == 1:
                    config_out[vars_label[id][0]] = var
                elif len(vars_label[id]) == 2:
                    if vars_label[id][0] in config_out:  # if key1 exist
                        config_out[vars_label[id][0]][vars_label[id][1]] = var
                    else:
                        config_out[vars_label[id][0]] = {
                            vars_label[id][1]: var
                        }
                else:
                    raise ValueError('Only 2-level config files are supported')
                fname_out += '-{}={}'.format(vars_alias[id],
                                             str(var).strip("[]").strip("''"))
            if len(config_budget) > 0:
                config_out = match_baseline_cfg(config_out, config_budget)
            with open('{}/{}.yaml'.format(out_dir, fname_out), "w") as f:
                yaml.dump(config_out, f, default_flow_style=False)
        print('{} configurations saved to: {}'.format(len(vars_value),
                                                      out_dir))
コード例 #2
0
ファイル: agg_runs.py プロジェクト: oldppd/pytorch_geometric
def agg_runs(dir, metric_best='auto'):
    r'''
    Aggregate over different random seeds of a single experiment

    Args:
        dir (str): Directory of the results, containing 1 experiment
        metric_best (str, optional): The metric for selecting the best
        validation performance. Options: auto, accuracy, auc.

    '''
    results = {'train': None, 'val': None}
    results_best = {'train': None, 'val': None}
    for seed in os.listdir(dir):
        if is_seed(seed):
            dir_seed = os.path.join(dir, seed)

            split = 'val'
            if split in os.listdir(dir_seed):
                dir_split = os.path.join(dir_seed, split)
                fname_stats = os.path.join(dir_split, 'stats.json')
                stats_list = json_to_dict_list(fname_stats)
                if metric_best == 'auto':
                    metric = 'auc' if 'auc' in stats_list[0] else 'accuracy'
                else:
                    metric = metric_best
                performance_np = np.array(  # noqa
                    [stats[metric] for stats in stats_list])
                best_epoch = \
                    stats_list[
                        eval("performance_np.{}()".format(cfg.metric_agg))][
                        'epoch']
                print(best_epoch)

            for split in os.listdir(dir_seed):
                if is_split(split):
                    dir_split = os.path.join(dir_seed, split)
                    fname_stats = os.path.join(dir_split, 'stats.json')
                    stats_list = json_to_dict_list(fname_stats)
                    stats_best = [
                        stats for stats in stats_list
                        if stats['epoch'] == best_epoch
                    ][0]
                    print(stats_best)
                    stats_list = [[stats] for stats in stats_list]
                    if results[split] is None:
                        results[split] = stats_list
                    else:
                        results[split] = join_list(results[split], stats_list)
                    if results_best[split] is None:
                        results_best[split] = [stats_best]
                    else:
                        results_best[split] += [stats_best]
    results = {k: v for k, v in results.items() if v is not None}  # rm None
    results_best = {k: v
                    for k, v in results_best.items()
                    if v is not None}  # rm None
    for key in results:
        for i in range(len(results[key])):
            results[key][i] = agg_dict_list(results[key][i])
    for key in results_best:
        results_best[key] = agg_dict_list(results_best[key])
    # save aggregated results
    for key, value in results.items():
        dir_out = os.path.join(dir, 'agg', key)
        makedirs_rm_exist(dir_out)
        fname = os.path.join(dir_out, 'stats.json')
        dict_list_to_json(value, fname)

        if cfg.tensorboard_agg:
            if SummaryWriter is None:
                raise ImportError(
                    'Tensorboard support requires `tensorboardX`.')
            writer = SummaryWriter(dir_out)
            dict_list_to_tb(value, writer)
            writer.close()
    for key, value in results_best.items():
        dir_out = os.path.join(dir, 'agg', key)
        fname = os.path.join(dir_out, 'best.json')
        dict_to_json(value, fname)
    logging.info('Results aggregated across runs saved in {}'.format(
        os.path.join(dir, 'agg')))
コード例 #3
0
ファイル: agg_runs.py プロジェクト: oldppd/pytorch_geometric
def agg_batch(dir, metric_best='auto'):
    r'''
    Aggregate across results from multiple experiments via grid search

    Args:
        dir (str): Directory of the results, containing multiple experiments
        metric_best (str, optional): The metric for selecting the best
        validation performance. Options: auto, accuracy, auc.

    '''
    import pandas as pd
    results = {'train': [], 'val': [], 'test': []}
    for run in os.listdir(dir):
        if run != 'agg':
            dict_name = name_to_dict(run)
            dir_run = os.path.join(dir, run, 'agg')
            if os.path.isdir(dir_run):
                for split in os.listdir(dir_run):
                    dir_split = os.path.join(dir_run, split)
                    fname_stats = os.path.join(dir_split, 'best.json')
                    dict_stats = json_to_dict_list(fname_stats)[
                        -1]  # get best val epoch
                    rm_keys(dict_stats,
                            ['lr', 'lr_std', 'eta', 'eta_std', 'params_std'])
                    results[split].append({**dict_name, **dict_stats})
    dir_out = os.path.join(dir, 'agg')
    makedirs_rm_exist(dir_out)
    for key in results:
        if len(results[key]) > 0:
            results[key] = pd.DataFrame(results[key])
            results[key] = results[key].sort_values(list(dict_name.keys()),
                                                    ascending=[True] *
                                                    len(dict_name))
            fname = os.path.join(dir_out, '{}_best.csv'.format(key))
            results[key].to_csv(fname, index=False)

    results = {'train': [], 'val': [], 'test': []}
    for run in os.listdir(dir):
        if run != 'agg':
            dict_name = name_to_dict(run)
            dir_run = os.path.join(dir, run, 'agg')
            if os.path.isdir(dir_run):
                for split in os.listdir(dir_run):
                    dir_split = os.path.join(dir_run, split)
                    fname_stats = os.path.join(dir_split, 'stats.json')
                    dict_stats = json_to_dict_list(fname_stats)[
                        -1]  # get last epoch
                    rm_keys(dict_stats,
                            ['lr', 'lr_std', 'eta', 'eta_std', 'params_std'])
                    results[split].append({**dict_name, **dict_stats})
    dir_out = os.path.join(dir, 'agg')
    for key in results:
        if len(results[key]) > 0:
            results[key] = pd.DataFrame(results[key])
            results[key] = results[key].sort_values(list(dict_name.keys()),
                                                    ascending=[True] *
                                                    len(dict_name))
            fname = os.path.join(dir_out, '{}.csv'.format(key))
            results[key].to_csv(fname, index=False)

    results = {'train': [], 'val': [], 'test': []}
    for run in os.listdir(dir):
        if run != 'agg':
            dict_name = name_to_dict(run)
            dir_run = os.path.join(dir, run, 'agg')
            if os.path.isdir(dir_run):
                for split in os.listdir(dir_run):
                    dir_split = os.path.join(dir_run, split)
                    fname_stats = os.path.join(dir_split, 'stats.json')
                    dict_stats = json_to_dict_list(
                        fname_stats)  # get best epoch
                    if metric_best == 'auto':
                        metric = 'auc' if 'auc' in dict_stats[0] \
                            else 'accuracy'
                    else:
                        metric = metric_best
                    performance_np = np.array(  # noqa
                        [stats[metric] for stats in dict_stats])
                    dict_stats = dict_stats[eval("performance_np.{}()".format(
                        cfg.metric_agg))]
                    rm_keys(dict_stats,
                            ['lr', 'lr_std', 'eta', 'eta_std', 'params_std'])
                    results[split].append({**dict_name, **dict_stats})
    dir_out = os.path.join(dir, 'agg')
    for key in results:
        if len(results[key]) > 0:
            results[key] = pd.DataFrame(results[key])
            results[key] = results[key].sort_values(list(dict_name.keys()),
                                                    ascending=[True] *
                                                    len(dict_name))
            fname = os.path.join(dir_out, '{}_bestepoch.csv'.format(key))
            results[key].to_csv(fname, index=False)

    print('Results aggregated across models saved in {}'.format(dir_out))
コード例 #4
0
def gen_grid_sample(args, config, config_budget={}, compare_alias_list=[]):
    task_name = f'{get_fname(args.config)}_grid_{get_fname(args.grid)}'
    fname_start = get_fname(args.config)
    out_dir = f'{args.out_dir}/{task_name}'
    makedirs_rm_exist(out_dir)
    config['out_dir'] = os.path.join(config['out_dir'], task_name)
    outs = load_search_file(args.grid)

    counts = []
    for out in outs:
        vars_grid = [string_to_python(row[2]) for row in out]
        count = 1
        for var in vars_grid:
            count *= len(var)
        counts.append(count)
    counts = np.array(counts)
    print('Total size of each chunk of experiment space:', counts)
    counts = counts / np.sum(counts)
    counts = np.round(counts * args.sample_num)
    counts[0] += args.sample_num - np.sum(counts)
    print('Total sample size of each chunk of experiment space:', counts)

    for i, out in enumerate(outs):
        vars_label = [row[0].split('.') for row in out]
        vars_alias = [row[1] for row in out]
        if i == 0:
            print(f'Variable label: {vars_label}')
            print(f'Variable alias: {vars_alias}')
        vars_grid = [string_to_python(row[2]) for row in out]
        for alias in compare_alias_list:
            alias_id = vars_alias.index(alias)
            vars_grid_select = copy.deepcopy(vars_grid[alias_id])
            vars_grid[alias_id] = [vars_grid[alias_id][0]]
            vars_value = grid2list_sample(vars_grid, counts[i])

            vars_value_new = []
            for vars in vars_value:
                for grid in vars_grid_select:
                    vars[alias_id] = grid
                    vars_value_new.append(copy.deepcopy(vars))
            vars_value = vars_value_new

            vars_grid[alias_id] = vars_grid_select
            for vars in vars_value:
                config_out = config.copy()
                fname_out = fname_start + f'-sample={vars_alias[alias_id]}'
                for id, var in enumerate(vars):
                    if len(vars_label[id]) == 1:
                        config_out[vars_label[id][0]] = var
                    elif len(vars_label[id]) == 2:
                        if vars_label[id][0] in config_out:  # if key1 exist
                            config_out[vars_label[id][0]][vars_label[id]
                                                          [1]] = var
                        else:
                            config_out[vars_label[id][0]] = {
                                vars_label[id][1]: var
                            }
                    else:
                        raise ValueError(
                            'Only 2-level config files are supported')
                    var_repr = str(var).strip("[]").strip("''")
                    fname_out += f'-{vars_alias[id]}={var_repr}'
                if len(config_budget) > 0:
                    config_out = match_baseline_cfg(config_out,
                                                    config_budget,
                                                    verbose=False)
                with open(f'{out_dir}/{fname_out}.yaml', "w") as f:
                    yaml.dump(config_out, f, default_flow_style=False)
            print(f'Chunk {i + 1}/{len(outs)}: '
                  f'Perturbing design dimension {alias}, '
                  f'{len(vars_value)} configurations saved to: {out_dir}')