Example #1
0
def main():

    usage = "%prog exp_dir_test_fold_dir"
    parser = OptionParser(usage=usage)

    parser.add_option("-t", dest="test_fold", default=0, help="Test fold; default=%default")

    (options, args) = parser.parse_args()
    test_fold = options.test_fold
    exp_dir = args[0]

    results = pd.DataFrame(columns=("masked", "test", "valid", "dir"))

    run_dirs = glob.glob(os.path.join(exp_dir, "bayes*reuse*"))
    for i, dir in enumerate(run_dirs):
        run_num = int(fh.get_basename(dir).split("_")[-1])

        if run_num <= 40 and "1_" not in fh.get_basename(dir):
            results_dir = os.path.join(dir, "results")
            test_file = fh.make_filename(results_dir, "test_macro_f1", "csv")
            valid_file = fh.make_filename(results_dir, "valid_cv_macro_f1", "csv")
            masked_valid_file = fh.make_filename(results_dir, "masked_valid_cv_macro_f1", "csv")

            try:
                test = pd.read_csv(test_file, header=False, index_col=0)
                valid = pd.read_csv(valid_file, header=False, index_col=0)
                masked_valid = pd.read_csv(masked_valid_file, header=False, index_col=0)

                # results.loc[run_num, 'iteration'] = run_num
                results.loc[i, "masked"] = masked_valid["overall"].mean()
                results.loc[i, "test"] = test["overall"].mean()
                results.loc[i, "valid"] = valid["overall"].mean()
                results.loc[i, "dir"] = fh.get_basename(dir)
            except:
                continue

    results.to_csv(fh.make_filename(exp_dir, "summary", "csv"), columns=results.columns)

    sorted = results.sort("valid")
    print sorted

    print "best by masked"
    sorted = results.sort("masked")
    print sorted.values[-1, :]

    print "best by valid"
    sorted = results.sort("valid")
    print sorted.values[-1, :]
Example #2
0
def main():
    exp_dir = defines.exp_dir
    exp_name = 'bayes_opt_LR_alphas_reuse'
    df = pd.DataFrame()

    basenames = ['test_acc.csv', 'test_micro_f1.csv', 'test_macro_f1.csv', 'test_pp.csv']
    rownames = ['model accuracy', 'model micro f1', 'model macro f1', 'model percent perfect']

    for i, basename in enumerate(basenames):
        rowname = rownames[i]
        files = glob.glob(os.path.join(exp_dir, '*', 'test_fold_0', exp_name, 'results', basename))
        gather_results(df, files, rowname)

    files = glob.glob(os.path.join(defines.data_raw_labels_dir, '*.csv'))
    for file in files:
        dataset = fh.get_basename(file)
        codes = labels.get_dataset_labels(dataset)
        if dataset in df.columns:
            df.loc['Number of responses', dataset] = codes.shape[0]
            df.loc['Number of labels', dataset] = codes.shape[1]

    output_dir = '/Users/dcard/Dropbox/CMU/DAP/results/'
    output_filename = fh.make_filename(output_dir, exp_name, 'csv')
    df.to_csv(output_filename)