if __name__ == "__main__": np.set_printoptions(suppress=True) parser = argparse.ArgumentParser( description="Domain generalization testbed") parser.add_argument("--input_dir", required=True) parser.add_argument('--dataset', required=True) parser.add_argument('--algorithm', required=True) parser.add_argument('--test_env', type=int, required=True) args = parser.parse_args() records = reporting.load_records(args.input_dir) print("Total records:", len(records)) records = reporting.get_grouped_records(records) records = records.filter( lambda r: r['dataset'] == args.dataset and r['algorithm'] == args.algorithm and r['test_env'] == args.test_env ) SELECTION_METHODS = [ model_selection.IIDAccuracySelectionMethod, model_selection.LeaveOneOutSelectionMethod, model_selection.OracleSelectionMethod, ] for selection_method in SELECTION_METHODS: print(f'Model selection: {selection_method.name}')
def todo_rename(records, selection_method, latex): grouped_records = reporting.get_grouped_records(records).map(lambda group: { **group, "sweep_acc": selection_method.sweep_acc(group["records"]) } ).filter(lambda g: g["sweep_acc"] is not None) # read algorithm names and sort (predefined order) alg_names = Q(records).select("args.algorithm").unique() alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] + [n for n in alg_names if n not in algorithms.ALGORITHMS]) # read dataset names and sort (lexicographic order) dataset_names = Q(records).select("args.dataset").unique().sorted() dataset_names = [d for d in datasets.DATASETS if d in dataset_names] for dataset in dataset_names: if latex: print() print("\\subsubsection{{{}}}".format(dataset)) test_envs = range(datasets.num_environments(dataset)) table = [[None for _ in [*test_envs, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] for j, test_env in enumerate(test_envs): trial_accs = (grouped_records .filter_equals( "dataset, algorithm, test_env", (dataset, algorithm, test_env) ).select("sweep_acc")) mean, err, table[i][j] = format_mean(trial_accs, latex) means.append(mean) if None in means: table[i][-1] = "X" else: table[i][-1] = "{:.1f}".format(sum(means) / len(means)) col_labels = [ "Algorithm", *datasets.get_dataset_class(dataset).ENVIRONMENTS, "Avg" ] header_text = (f"Dataset: {dataset}, " f"model selection method: {selection_method.name}") print_table(table, header_text, alg_names, list(col_labels), colwidth=20, latex=latex) # Print an "averages" table if latex: print() print("\\subsubsection{Averages}") table = [[None for _ in [*dataset_names, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] for j, dataset in enumerate(dataset_names): trial_averages = (grouped_records .filter_equals("algorithm, dataset", (algorithm, dataset)) .group("trial_seed") .map(lambda trial_seed, group: group.select("sweep_acc").mean() ) ) mean, err, table[i][j] = format_mean(trial_averages, latex) means.append(mean) if None in means: table[i][-1] = "X" else: table[i][-1] = "{:.1f}".format(sum(means) / len(means)) col_labels = ["Algorithm", *dataset_names, "Avg"] header_text = f"Averages, model selection method: {selection_method.name}" print_table(table, header_text, alg_names, col_labels, colwidth=25, latex=latex)
def print_results_tables(records, selection_method, latex): """Given all records, print a results table for each dataset.""" grouped_records = reporting.get_grouped_records(records).map(lambda group: { **group, "sweep_accs": selection_method.sweep_accs(group["records"]) } ) # read algorithm names and sort (predefined order) alg_names = Q(records).select("args.algorithm").unique() alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] + [n for n in alg_names if n not in algorithms.ALGORITHMS]) # read dataset names and sort (lexicographic order) dataset_names = Q(records).select("args.dataset").unique().sorted() dataset_names = [d for d in datasets.DATASETS if d in dataset_names] for dataset in dataset_names: if latex: print() print("\\subsubsection{{{}}}".format(dataset)) test_envs = range(datasets.num_environments(dataset)) table = [[None for _ in [*test_envs, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] stdevs = [] for j, test_env in enumerate(test_envs): try: acc = grouped_records.filter_equals( "dataset, algorithm, test_env", (dataset, algorithm, test_env) )[0]['sweep_accs'][0] mean = acc['test_acc'] stdev = acc['test_acc_std'] except: mean = float('nan') stdev = float('nan') means.append(mean) stdevs.append(stdev) _, _, table[i][j] = format_mean(mean, stdev, latex) avg_mean = np.mean(means) avg_stdev = np.sqrt(np.sum(np.array(stdevs)**2)) / len(stdevs) _, _, table[i][-1] = format_mean(avg_mean, avg_stdev, latex) col_labels = [ "Algorithm", *datasets.get_dataset_class(dataset).ENVIRONMENTS, "Avg" ] header_text = (f"Dataset: {dataset}, " f"model selection method: {selection_method.name}") print_table(table, header_text, alg_names, list(col_labels), colwidth=20, latex=latex) # Print an "averages" table if latex: print() print("\\subsubsection{Averages}") table = [[None for _ in [*dataset_names, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] for j, dataset in enumerate(dataset_names): try: mean = (grouped_records .filter_equals("algorithm, dataset", (algorithm, dataset)) .select(lambda x: x['sweep_accs'][0]['test_acc']) .mean() ) except: mean = float('nan') mean *= 100. table[i][j] = "{:.1f}".format(mean) means.append(mean) table[i][-1] = "{:.1f}".format(sum(means) / len(means)) col_labels = ["Algorithm", *dataset_names, "Avg"] header_text = f"Averages, model selection method: {selection_method.name}" print_table(table, header_text, alg_names, col_labels, colwidth=25, latex=latex)