def main(): exp_dir = defines.exp_dir exp_name = 'bayes_opt_LR_alphas_reuse' df = pd.DataFrame() basenames = ['test_acc.csv', 'test_micro_f1.csv', 'test_macro_f1.csv', 'test_pp.csv'] rownames = ['model accuracy', 'model micro f1', 'model macro f1', 'model percent perfect'] for i, basename in enumerate(basenames): rowname = rownames[i] files = glob.glob(os.path.join(exp_dir, '*', 'test_fold_0', exp_name, 'results', basename)) gather_results(df, files, rowname) files = glob.glob(os.path.join(defines.data_raw_labels_dir, '*.csv')) for file in files: dataset = fh.get_basename_wo_ext(file) codes = label_reader.get_dataset_labels(dataset) if dataset in df.columns: df.loc['Number of responses', dataset] = codes.shape[0] df.loc['Number of labels', dataset] = codes.shape[1] output_dir = '/Users/dcard/Dropbox/CMU/DAP/results/' output_filename = fh.make_filename(output_dir, exp_name, 'csv') df.to_csv(output_filename)
def find_most_errors(dataset, filename): predicted = pd.read_csv(filename) df_labels = label_reader.get_dataset_labels(dataset) for i in predicted.index: print np.sum(np.abs(predicted.loc[i, :] - df_labels.loc[i, :]))