def plot_results_by_revelation_and_max_answer_time( experiment_run='clicktionary', exclude_workerids=None, fit_line=True): # Get exp data p = get_settings(experiment_run) set_index = p['set_index'] # Plot CNN results data_cnn = get_cnn_results_by_revelation(set_index) do_plot(data_cnn, 'black', 'CNN', fit_line=fit_line) # Plot human results data_human_all = Data() data_human_all.load(experiment_run=experiment_run) mats = sorted(data_human_all.max_answer_times) colors = sns.cubehelix_palette(len(mats)) ax = None for max_answer_time, color in zip(mats, colors): revs, scores, subjects, ims = data_human_all.get_summary_by_revelation_and_max_answer_time( max_answer_time=max_answer_time) data_human = combine_revs_and_scores(revs, scores, subjects, ims) _, ax = do_plot(data_human, color, 'Human %dms' % max_answer_time, fit_line=fit_line, ax=ax) plt.title('Accuracy by image revelation and max answer time (n=%d)\n' % data_human_all.n_subjects + config.get_experiment_desc(experiment_run)) plt.legend() plt.savefig( os.path.join(config.plot_path, 'perf_by_revelation_and_mat_%s.png' % experiment_run)) plt.savefig( os.path.join(config.plot_path, 'perf_by_revelation_and_mat_%s.pdf' % experiment_run)) print 'Saved to: %s' % os.path.join( config.plot_path, 'perf_by_revelation_and_mat_%s.png' % experiment_run)
def load_multi(self, set_indexes, set_name, bootstrap_size=None): self.p = get_settings(set_name) r = [] for set_index in set_indexes: self.load_ground_truth(set_index, set_name) r += self.load_participant_json(set_name, verbose=(bootstrap_size is None)) if bootstrap_size is not None: idcs = np.random.choice(range(len(r)), size=bootstrap_size, replace=True) r = [r[i] for i in idcs] self.subject_ids = np.unique([subj[0] for subj in r]) if bootstrap_size is None: print '%d unique subjects' % (len(self.subject_ids)) for subj in r: #print "subj %d(%d trials) = %s" % (i_subj, len(trials), self.r[i_subj][0]) self.load_subject(json.loads(subj[3])) if bootstrap_size is None: print '%d trials, %d timeouts (%.1f%%)' % (self.n_trials, self.n_timeouts, float(self.n_timeouts) / self.n_trials * 100)
def plot_results_by_revelation( experiment_run='clicktionary', exclude_workerids=None, human_fit_line=['linear'], cnn_fit_line=['linear'], human_color_pallete='Set1', cnn_colorpallete='Greys', human_labels=None, cnn_labels='VGG16 performance on clicktionary maps', cnn_index=None, human_ci=95, cnn_ci=95, data_filter='', max_val=200): human_dfs = {} human_ims = {} human_stats = {} cnn_dfs = {} ax = None cnn_means = None if isinstance(experiment_run, list): # colors = sns.color_palette('Set1', len(experiment_run)) colors = sns.color_palette(human_color_pallete, len(experiment_run)) if len(human_fit_line) < len(experiment_run): human_fit_line = np.repeat(human_fit_line, len(experiment_run)) print 'Expanding fit_lines to match size of experiment_run' for idx, (exp, color, fl) in enumerate(zip(experiment_run, colors, human_fit_line)): p = get_settings(exp) set_index = p['set_index'] data_human, ims, stats = get_human_results_by_revaluation( exp, off=0, is_inverted_rev=False, log_scale=p['log_scale_revelations'], exclude_workerids=exclude_workerids, data_filter=data_filter) if human_labels is None: exp_params = [ x for x in re.split('[A-Za-z]+', exp) if len(x) > 0 ] title = 'Human image time: %s | response time: %s' % ( exp_params[0], exp_params[1]) else: title = human_labels[idx] human_dfs[exp] = data_human human_ims[exp] = ims human_stats[exp] = stats _, ax = do_plot(data_human, color, title, log_scale=p['log_scale_revelations'], max_val=max_val, fit_line=fl, ci=human_ci, ax=ax) plt.title('Accuracy by log-spaced image feature revelation\n') experiment_run = '_'.join(experiment_run) else: p = get_settings(experiment_run) set_index = p['set_index'], data_human, ims, stats = get_human_results_by_revaluation( experiment_run, off=0, is_inverted_rev=False, log_scale=p['log_scale_revelations'], exclude_workerids=exclude_workerids, data_filter=data_filter) human_dfs[experiment_run] = data_human human_ims[exp] = ims human_stats[exp] = stats plt.title('Accuracy by image revelation\n' + p['desc']) _, ax = do_plot(data_human, '#fc8d59', 'Human', log_scale=p['log_scale_revelations'], max_val=max_val, fit_line=human_fit_line, ax=ax) if len(human_stats) == 2: dfs = [ pd.DataFrame(x, columns=['Revelation', 'correctness', 'subject']) for x in human_dfs.values() ] human_dfs = [] for it_df in dfs: human_dfs += [{ x: it_df[it_df['Revelation'] == x].groupby('subject').mean() ['correctness'].as_matrix() for x in it_df['Revelation'].unique() }] # packaged_data = [repackage_per_subject(v) for v in human_dfs] p_values = measure_p_value_2_sample(*human_dfs) # (*packaged_data) print 'Disabled human/cnn stats until we get synthetic subjects' print p_values # CNN is always the same if 'cnn_class_file' in p.keys(): filter_class_file = p['cnn_class_file'] else: filter_class_file = None if cnn_index is None: cnn_index = set_index if isinstance(cnn_index, list): colors = sns.color_palette(cnn_colorpallete, len(cnn_index)) if len(cnn_fit_line) < len(cnn_index): cnn_fit_line = np.repeat(cnn_fit_line, len(experiment_run)) print 'Expanding fit_lines to match size of experiment_run' for idx, (si, color, la, fl) in enumerate( zip(cnn_index, colors, cnn_labels, cnn_fit_line)): data_cnn = get_cnn_results_by_revelation( si, filter_class_file=filter_class_file, off=float(idx) / 2) if p['log_scale_revelations']: data_cnn = apply_log_scale(data_human, data_cnn, off=float(idx * 2)) cnn_dfs[cnn_index[0]] = data_cnn cnn_means, ax = do_plot(data_cnn, color, la, log_scale=p['log_scale_revelations'], max_val=max_val, fit_line=fl, ci=cnn_ci, ax=ax) else: data_cnn = get_cnn_results_by_revelation( set_index, filter_class_file=filter_class_file) if p['log_scale_revelations']: data_cnn = apply_log_scale(data_human, data_cnn) cnn_dfs['set_index'] = data_cnn cnn_means, ax = do_plot(data_cnn, '#91bfdb', cnn_labels, log_scale=p['log_scale_revelations'], max_val=200, fit_line=cnn_fit_line, ci=cnn_ci, ax=ax) if human_stats.keys() and cnn_means is not None: p_values = { k: measure_p_value_human_v_cnn(v, cnn_means) for k, v in human_stats.iteritems() } # print 'Disabled human/cnn stats until we get synthetic subjects' # print p_values # Count per bin # of people that exceed machines. plt.legend(loc='upper left') plt.savefig( os.path.join(config.plot_path, 'perf_by_revelation_%s.png' % experiment_run)) plt.savefig( os.path.join(config.plot_path, 'perf_by_revelation_%s.pdf' % experiment_run)) print 'Saved to: %s' % os.path.join( config.plot_path, 'perf_by_revelation_and_mat_%s.png' % experiment_run) return human_dfs, cnn_dfs, human_ims, None
def plot_results_by_revaluation_by_class( experiment_run, class_file, exclude_workerids=None, is_inverted_rev=False, human_ci=66.6, cnn_ci=66.6, human_fit_line=[''], # ['linear','logistic'], cnn_fit_line=[''], # ['linear','logistic'], human_colorpallete='Set2', cnn_colorpallete='Greys', fit_line='linear', data_filter=''): p = get_settings(experiment_run) set_index = p['set_index'] # Parse class file exps = [] for i, fn in enumerate([class_file]): fn_full = os.path.join( '/media/data_cifs/clicktionary/causal_experiment', fn) lines = open(fn_full, 'rt').read().splitlines() for l in lines: classname, cat = l.split(' ') cat = int(cat) if len(exps) <= cat: exps.append(set()) exps[cat].add(classname) # Plot Human colors = sns.color_palette(human_colorpallete, len(exps)) data_human = [ get_human_results_by_revaluation(experiment_run=experiment_run, filename_filter=x, off=0, is_inverted_rev=is_inverted_rev, log_scale=p['log_scale_revelations'], exclude_workerids=exclude_workerids, data_filter=data_filter, return_ims=False) for idx, x in enumerate(exps) ] ax = None for x, c, l in zip(data_human, colors, exps): _, ax = do_plot(x, c, list(l), log_scale=p['log_scale_revelations'], ci=human_ci, fit_line=human_fit_line, ax=ax) # Plot CNN if 'cnn_class_file' in p.keys(): filter_class_file = p['cnn_class_file'] else: filter_class_file = None print set_index data_cnn = get_cnn_results_by_revelation( set_index, filter_class_file=filter_class_file) if p['log_scale_revelations']: data_cnn = apply_log_scale(data_human[0], data_cnn) colors = sns.color_palette(cnn_colorpallete, len(exps))[0] _, ax = do_plot(data_cnn, colors, 'CNN', log_scale=p['log_scale_revelations'], ci=cnn_ci, fit_line=cnn_fit_line, ax=ax) plt.title('Accuracy by image revelation') plt.legend() plt.savefig( os.path.join(config.plot_path, 'perf_by_revelation_by_class_%s.png' % experiment_run))
def load(self, experiment_run, exclude_workerids=['']): self.p = get_settings(experiment_run) set_index, set_name = p['set_index'], p['set_name'] self.load_ground_truth(set_index, set_name) self.load_participants(experiment_run, exclude_workerids)