Ejemplo n.º 1
0
def main():
    exp_dir = defines.exp_dir
    exp_name = 'bayes_opt_LR_alphas_reuse'
    df = pd.DataFrame()

    basenames = ['test_acc.csv', 'test_micro_f1.csv', 'test_macro_f1.csv', 'test_pp.csv']
    rownames = ['model accuracy', 'model micro f1', 'model macro f1', 'model percent perfect']

    for i, basename in enumerate(basenames):
        rowname = rownames[i]
        files = glob.glob(os.path.join(exp_dir, '*', 'test_fold_0', exp_name, 'results', basename))
        gather_results(df, files, rowname)

    files = glob.glob(os.path.join(defines.data_raw_labels_dir, '*.csv'))
    for file in files:
        dataset = fh.get_basename(file)
        codes = labels.get_dataset_labels(dataset)
        if dataset in df.columns:
            df.loc['Number of responses', dataset] = codes.shape[0]
            df.loc['Number of labels', dataset] = codes.shape[1]

    output_dir = '/Users/dcard/Dropbox/CMU/DAP/results/'
    output_filename = fh.make_filename(output_dir, exp_name, 'csv')
    df.to_csv(output_filename)
Ejemplo n.º 2
0
def find_most_errors(dataset, filename):
    predicted = pd.read_csv(filename)
    df_labels = labels.get_dataset_labels(dataset)

    for i in predicted.index:
        print np.sum(np.abs(predicted.loc[i, :] - df_labels.loc[i, :]))