def print_results_tables(records, selection_method, latex):
    """Given all records, print a results table for each dataset."""
    grouped_records = get_grouped_records(records).map(
        lambda group: {
            **group, 'sweep_acc': selection_method.sweep_acc(group['records'])
        }).filter(lambda g: g['sweep_acc'] is not None)

    # read algorithm names and sort (predefined order)
    alg_names = Q(records).select('args.algorithm').unique()
    alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] +
                 [n for n in alg_names if n not in algorithms.ALGORITHMS])

    # read dataset names and sort (lexicographic order)
    dataset_names = Q(records).select('args.dataset').unique().sorted()

    for dataset in dataset_names:
        test_envs = range(datasets.NUM_ENVIRONMENTS[dataset])

        table = [[None for _ in test_envs] for _ in alg_names]
        for i, algorithm in enumerate(alg_names):
            for j, test_env in enumerate(test_envs):
                trial_accs = (grouped_records.filter_equals(
                    'dataset, algorithm, test_env',
                    (dataset, algorithm, test_env)).select('sweep_acc'))
                table[i][j] = format_mean(trial_accs, latex)

        col_labels = [
            'Algorithm', *datasets.get_dataset_class(dataset).ENVIRONMENT_NAMES
        ]
        header_text = (f'Dataset: {dataset}, '
                       f'model selection method: {selection_method.name}')
        print_table(table,
                    header_text,
                    alg_names,
                    list(col_labels),
                    colwidth=20,
                    latex=latex)

    # Print an 'averages' table

    table = [[None for _ in dataset_names] for _ in alg_names]
    for i, algorithm in enumerate(alg_names):
        for j, dataset in enumerate(dataset_names):
            trial_averages = (grouped_records.filter_equals(
                'algorithm, dataset',
                (algorithm, dataset)).group('trial_seed').map(
                    lambda trial_seed, group: group.select('sweep_acc').mean())
                              )
            table[i][j] = format_mean(trial_averages, latex)

    col_labels = ['Algorithm', *dataset_names]
    header_text = f'Averages, model selection method: {selection_method.name}'
    print_table(table,
                header_text,
                alg_names,
                col_labels,
                colwidth=25,
                latex=latex)
def get_grouped_records(records):
    """Group records by (trial_seed, dataset, algorithm, test_env). Because
    records can have multiple test envs, a given record may appear in more than
    one group."""
    result = collections.defaultdict(lambda: [])
    for r in records:
        for test_env in r['args']['test_envs']:
            group = (r['args']['trial_seed'],
                r['args']['dataset'],
                r['args']['algorithm'],
                test_env)
            result[group].append(r)
    return Q([{'trial_seed': t, 'dataset': d, 'algorithm': a, 'test_env': e,
        'records': Q(r)} for (t,d,a,e),r in result.items()])
Beispiel #3
0
def get_grouped_records(records):
    """Group records by (trial_seed, dataset, algorithm, test_env). Because
    records can have multiple test envs, a given record may appear in more than
    one group."""
    result = collections.defaultdict(lambda: [])
    for r in records:
        for test_env in r["args"]["test_envs"]:
            group = (r["args"]["trial_seed"],
                r["args"]["dataset"],
                r["args"]["algorithm"],
                test_env)
            result[group].append(r)
    return Q([{"trial_seed": t, "dataset": d, "algorithm": a, "test_env": e,
        "records": Q(r)} for (t,d,a,e),r in result.items()])
Beispiel #4
0
def load_records(path):
    records = []
    for i, subdir in tqdm.tqdm(list(enumerate(os.listdir(path))),
                               ncols=80,
                               leave=False):
        results_path = os.path.join(path, subdir, "results.jsonl")
        try:
            with open(results_path, "r") as f:
                for line in f:
                    records.append(json.loads(line[:-1]))
        except IOError:
            pass

    return Q(records)
Beispiel #5
0
                last_results_keys = results_keys
            misc.print_row([results[key] for key in results_keys], colwidth=12)

            results.update({'hparams': hparams, 'args': vars(args)})

            epochs_path = os.path.join(args.output_dir, 'results.jsonl')
            with open(epochs_path, 'a') as f:
                f.write(json.dumps(results, sort_keys=True) + "\n")

            algorithm_dict = algorithm.state_dict()
            start_step = step + 1
            checkpoint_vals = collections.defaultdict(lambda: [])

            records = []
            with open(epochs_path, 'r') as f:
                for line in f:
                    records.append(json.loads(line[:-1]))
            records = Q(records)
            scores = records.map(
                model_selection.IIDAccuracySelectionMethod._step_acc)
            if scores[-1] == scores.argmax('val_acc'):
                save_checkpoint('IID_best.pkl')
                algorithm.to(device)

            if args.save_model_every_checkpoint:
                save_checkpoint(f'model_step{step}.pkl')
    save_checkpoint('model.pkl')

    with open(os.path.join(args.output_dir, 'done'), 'w') as f:
        f.write('done')
def todo_rename(records, selection_method, latex):

    grouped_records = reporting.get_grouped_records(records).map(lambda group:
        { **group, "sweep_acc": selection_method.sweep_acc(group["records"]) }
    ).filter(lambda g: g["sweep_acc"] is not None)

    # read algorithm names and sort (predefined order)
    alg_names = Q(records).select("args.algorithm").unique()
    alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] +
        [n for n in alg_names if n not in algorithms.ALGORITHMS])

    # read dataset names and sort (lexicographic order)
    dataset_names = Q(records).select("args.dataset").unique().sorted()
    dataset_names = [d for d in datasets.DATASETS if d in dataset_names]

    for dataset in dataset_names:
        if latex:
            print()
            print("\\subsubsection{{{}}}".format(dataset))
        test_envs = range(datasets.num_environments(dataset))

        table = [[None for _ in [*test_envs, "Avg"]] for _ in alg_names]
        for i, algorithm in enumerate(alg_names):
            means = []
            for j, test_env in enumerate(test_envs):
                trial_accs = (grouped_records
                    .filter_equals(
                        "dataset, algorithm, test_env",
                        (dataset, algorithm, test_env)
                    ).select("sweep_acc"))
                mean, err, table[i][j] = format_mean(trial_accs, latex)
                means.append(mean)
            if None in means:
                table[i][-1] = "X"
            else:
                table[i][-1] = "{:.1f}".format(sum(means) / len(means))

        col_labels = [
            "Algorithm", 
            *datasets.get_dataset_class(dataset).ENVIRONMENTS,
            "Avg"
        ]
        header_text = (f"Dataset: {dataset}, "
            f"model selection method: {selection_method.name}")
        print_table(table, header_text, alg_names, list(col_labels),
            colwidth=20, latex=latex)

    # Print an "averages" table
    if latex:
        print()
        print("\\subsubsection{Averages}")

    table = [[None for _ in [*dataset_names, "Avg"]] for _ in alg_names]
    for i, algorithm in enumerate(alg_names):
        means = []
        for j, dataset in enumerate(dataset_names):
            trial_averages = (grouped_records
                .filter_equals("algorithm, dataset", (algorithm, dataset))
                .group("trial_seed")
                .map(lambda trial_seed, group:
                    group.select("sweep_acc").mean()
                )
            )
            mean, err, table[i][j] = format_mean(trial_averages, latex)
            means.append(mean)
        if None in means:
            table[i][-1] = "X"
        else:
            table[i][-1] = "{:.1f}".format(sum(means) / len(means))

    col_labels = ["Algorithm", *dataset_names, "Avg"]
    header_text = f"Averages, model selection method: {selection_method.name}"
    print_table(table, header_text, alg_names, col_labels, colwidth=25,
        latex=latex)
Beispiel #7
0
def print_results_tables(records, selection_method, latex):
    """Given all records, print a results table for each dataset."""
    grouped_records = reporting.get_grouped_records(records).map(lambda group:
        { **group, "sweep_accs": selection_method.sweep_accs(group["records"]) }
    )

    # read algorithm names and sort (predefined order)
    alg_names = Q(records).select("args.algorithm").unique()
    alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] +
        [n for n in alg_names if n not in algorithms.ALGORITHMS])

    # read dataset names and sort (lexicographic order)
    dataset_names = Q(records).select("args.dataset").unique().sorted()
    dataset_names = [d for d in datasets.DATASETS if d in dataset_names]

    for dataset in dataset_names:
        if latex:
            print()
            print("\\subsubsection{{{}}}".format(dataset))
        test_envs = range(datasets.num_environments(dataset))

        table = [[None for _ in [*test_envs, "Avg"]] for _ in alg_names]
        for i, algorithm in enumerate(alg_names):
            means = []
            stdevs = []
            for j, test_env in enumerate(test_envs):
                try:
                    acc = grouped_records.filter_equals(
                            "dataset, algorithm, test_env",
                            (dataset, algorithm, test_env)
                        )[0]['sweep_accs'][0]
                    mean = acc['test_acc']
                    stdev = acc['test_acc_std']
                except:
                    mean = float('nan')
                    stdev = float('nan')
                means.append(mean)
                stdevs.append(stdev)
                _, _, table[i][j] = format_mean(mean, stdev, latex)

            avg_mean = np.mean(means)
            avg_stdev = np.sqrt(np.sum(np.array(stdevs)**2)) / len(stdevs)
            _, _, table[i][-1] = format_mean(avg_mean, avg_stdev, latex)

        col_labels = [
            "Algorithm",
            *datasets.get_dataset_class(dataset).ENVIRONMENTS,
            "Avg"
        ]
        header_text = (f"Dataset: {dataset}, "
            f"model selection method: {selection_method.name}")
        print_table(table, header_text, alg_names, list(col_labels),
            colwidth=20, latex=latex)

    # Print an "averages" table
    if latex:
        print()
        print("\\subsubsection{Averages}")

    table = [[None for _ in [*dataset_names, "Avg"]] for _ in alg_names]
    for i, algorithm in enumerate(alg_names):
        means = []
        for j, dataset in enumerate(dataset_names):
            try:
                mean = (grouped_records
                    .filter_equals("algorithm, dataset", (algorithm, dataset))
                    .select(lambda x: x['sweep_accs'][0]['test_acc'])
                    .mean()
                )
            except:
                mean = float('nan')
            mean *= 100.
            table[i][j] = "{:.1f}".format(mean)
            means.append(mean)
        table[i][-1] = "{:.1f}".format(sum(means) / len(means))

    col_labels = ["Algorithm", *dataset_names, "Avg"]
    header_text = f"Averages, model selection method: {selection_method.name}"
    print_table(table, header_text, alg_names, col_labels, colwidth=25,
        latex=latex)