def main(): usage = "%prog exp_dir_test_fold_dir" parser = OptionParser(usage=usage) parser.add_option("-t", dest="test_fold", default=0, help="Test fold; default=%default") (options, args) = parser.parse_args() test_fold = options.test_fold exp_dir = args[0] results = pd.DataFrame(columns=("masked", "test", "valid", "dir")) run_dirs = glob.glob(os.path.join(exp_dir, "bayes*reuse*")) for i, dir in enumerate(run_dirs): run_num = int(fh.get_basename(dir).split("_")[-1]) if run_num <= 40 and "1_" not in fh.get_basename(dir): results_dir = os.path.join(dir, "results") test_file = fh.make_filename(results_dir, "test_macro_f1", "csv") valid_file = fh.make_filename(results_dir, "valid_cv_macro_f1", "csv") masked_valid_file = fh.make_filename(results_dir, "masked_valid_cv_macro_f1", "csv") try: test = pd.read_csv(test_file, header=False, index_col=0) valid = pd.read_csv(valid_file, header=False, index_col=0) masked_valid = pd.read_csv(masked_valid_file, header=False, index_col=0) # results.loc[run_num, 'iteration'] = run_num results.loc[i, "masked"] = masked_valid["overall"].mean() results.loc[i, "test"] = test["overall"].mean() results.loc[i, "valid"] = valid["overall"].mean() results.loc[i, "dir"] = fh.get_basename(dir) except: continue results.to_csv(fh.make_filename(exp_dir, "summary", "csv"), columns=results.columns) sorted = results.sort("valid") print sorted print "best by masked" sorted = results.sort("masked") print sorted.values[-1, :] print "best by valid" sorted = results.sort("valid") print sorted.values[-1, :]
def main(): exp_dir = defines.exp_dir exp_name = 'bayes_opt_LR_alphas_reuse' df = pd.DataFrame() basenames = ['test_acc.csv', 'test_micro_f1.csv', 'test_macro_f1.csv', 'test_pp.csv'] rownames = ['model accuracy', 'model micro f1', 'model macro f1', 'model percent perfect'] for i, basename in enumerate(basenames): rowname = rownames[i] files = glob.glob(os.path.join(exp_dir, '*', 'test_fold_0', exp_name, 'results', basename)) gather_results(df, files, rowname) files = glob.glob(os.path.join(defines.data_raw_labels_dir, '*.csv')) for file in files: dataset = fh.get_basename(file) codes = labels.get_dataset_labels(dataset) if dataset in df.columns: df.loc['Number of responses', dataset] = codes.shape[0] df.loc['Number of labels', dataset] = codes.shape[1] output_dir = '/Users/dcard/Dropbox/CMU/DAP/results/' output_filename = fh.make_filename(output_dir, exp_name, 'csv') df.to_csv(output_filename)