コード例 #1
0
def plot_results_by_revelation_and_max_answer_time(
        experiment_run='clicktionary', exclude_workerids=None, fit_line=True):
    # Get exp data
    p = get_settings(experiment_run)
    set_index = p['set_index']
    # Plot CNN results
    data_cnn = get_cnn_results_by_revelation(set_index)
    do_plot(data_cnn, 'black', 'CNN', fit_line=fit_line)
    # Plot human results
    data_human_all = Data()
    data_human_all.load(experiment_run=experiment_run)
    mats = sorted(data_human_all.max_answer_times)
    colors = sns.cubehelix_palette(len(mats))
    ax = None
    for max_answer_time, color in zip(mats, colors):
        revs, scores, subjects, ims = data_human_all.get_summary_by_revelation_and_max_answer_time(
            max_answer_time=max_answer_time)
        data_human = combine_revs_and_scores(revs, scores, subjects, ims)
        _, ax = do_plot(data_human,
                        color,
                        'Human %dms' % max_answer_time,
                        fit_line=fit_line,
                        ax=ax)
    plt.title('Accuracy by image revelation and max answer time (n=%d)\n' %
              data_human_all.n_subjects +
              config.get_experiment_desc(experiment_run))
    plt.legend()
    plt.savefig(
        os.path.join(config.plot_path,
                     'perf_by_revelation_and_mat_%s.png' % experiment_run))
    plt.savefig(
        os.path.join(config.plot_path,
                     'perf_by_revelation_and_mat_%s.pdf' % experiment_run))
    print 'Saved to: %s' % os.path.join(
        config.plot_path, 'perf_by_revelation_and_mat_%s.png' % experiment_run)
コード例 #2
0
 def load_multi(self, set_indexes, set_name, bootstrap_size=None):
     self.p = get_settings(set_name)
     r = []
     for set_index in set_indexes:
         self.load_ground_truth(set_index, set_name)
         r += self.load_participant_json(set_name, verbose=(bootstrap_size is None))
     if bootstrap_size is not None:
         idcs = np.random.choice(range(len(r)), size=bootstrap_size, replace=True)
         r = [r[i] for i in idcs]
     self.subject_ids = np.unique([subj[0] for subj in r])
     if bootstrap_size is None:
         print '%d unique subjects' % (len(self.subject_ids))
     for subj in r:
         #print "subj %d(%d trials) = %s" % (i_subj, len(trials), self.r[i_subj][0])
         self.load_subject(json.loads(subj[3]))
     if bootstrap_size is None:
         print '%d trials, %d timeouts (%.1f%%)' % (self.n_trials, self.n_timeouts, float(self.n_timeouts) / self.n_trials * 100)
コード例 #3
0
def plot_results_by_revelation(
        experiment_run='clicktionary',
        exclude_workerids=None,
        human_fit_line=['linear'],
        cnn_fit_line=['linear'],
        human_color_pallete='Set1',
        cnn_colorpallete='Greys',
        human_labels=None,
        cnn_labels='VGG16 performance on clicktionary maps',
        cnn_index=None,
        human_ci=95,
        cnn_ci=95,
        data_filter='',
        max_val=200):
    human_dfs = {}
    human_ims = {}
    human_stats = {}
    cnn_dfs = {}
    ax = None
    cnn_means = None
    if isinstance(experiment_run, list):
        # colors = sns.color_palette('Set1', len(experiment_run))
        colors = sns.color_palette(human_color_pallete, len(experiment_run))
        if len(human_fit_line) < len(experiment_run):
            human_fit_line = np.repeat(human_fit_line, len(experiment_run))
            print 'Expanding fit_lines to match size of experiment_run'
        for idx, (exp, color,
                  fl) in enumerate(zip(experiment_run, colors,
                                       human_fit_line)):
            p = get_settings(exp)
            set_index = p['set_index']
            data_human, ims, stats = get_human_results_by_revaluation(
                exp,
                off=0,
                is_inverted_rev=False,
                log_scale=p['log_scale_revelations'],
                exclude_workerids=exclude_workerids,
                data_filter=data_filter)
            if human_labels is None:
                exp_params = [
                    x for x in re.split('[A-Za-z]+', exp) if len(x) > 0
                ]
                title = 'Human image time: %s | response time: %s' % (
                    exp_params[0], exp_params[1])
            else:
                title = human_labels[idx]
            human_dfs[exp] = data_human
            human_ims[exp] = ims
            human_stats[exp] = stats
            _, ax = do_plot(data_human,
                            color,
                            title,
                            log_scale=p['log_scale_revelations'],
                            max_val=max_val,
                            fit_line=fl,
                            ci=human_ci,
                            ax=ax)

        plt.title('Accuracy by log-spaced image feature revelation\n')
        experiment_run = '_'.join(experiment_run)
    else:
        p = get_settings(experiment_run)
        set_index = p['set_index'],
        data_human, ims, stats = get_human_results_by_revaluation(
            experiment_run,
            off=0,
            is_inverted_rev=False,
            log_scale=p['log_scale_revelations'],
            exclude_workerids=exclude_workerids,
            data_filter=data_filter)
        human_dfs[experiment_run] = data_human
        human_ims[exp] = ims
        human_stats[exp] = stats
        plt.title('Accuracy by image revelation\n' + p['desc'])
        _, ax = do_plot(data_human,
                        '#fc8d59',
                        'Human',
                        log_scale=p['log_scale_revelations'],
                        max_val=max_val,
                        fit_line=human_fit_line,
                        ax=ax)

    if len(human_stats) == 2:
        dfs = [
            pd.DataFrame(x, columns=['Revelation', 'correctness', 'subject'])
            for x in human_dfs.values()
        ]
        human_dfs = []
        for it_df in dfs:
            human_dfs += [{
                x: it_df[it_df['Revelation'] == x].groupby('subject').mean()
                ['correctness'].as_matrix()
                for x in it_df['Revelation'].unique()
            }]
        # packaged_data = [repackage_per_subject(v) for v in human_dfs]
        p_values = measure_p_value_2_sample(*human_dfs)  # (*packaged_data)
        print 'Disabled human/cnn stats until we get synthetic subjects'
        print p_values

    # CNN is always the same
    if 'cnn_class_file' in p.keys():
        filter_class_file = p['cnn_class_file']
    else:
        filter_class_file = None
    if cnn_index is None:
        cnn_index = set_index
    if isinstance(cnn_index, list):
        colors = sns.color_palette(cnn_colorpallete, len(cnn_index))
        if len(cnn_fit_line) < len(cnn_index):
            cnn_fit_line = np.repeat(cnn_fit_line, len(experiment_run))
            print 'Expanding fit_lines to match size of experiment_run'
        for idx, (si, color, la, fl) in enumerate(
                zip(cnn_index, colors, cnn_labels, cnn_fit_line)):
            data_cnn = get_cnn_results_by_revelation(
                si, filter_class_file=filter_class_file, off=float(idx) / 2)
            if p['log_scale_revelations']:
                data_cnn = apply_log_scale(data_human,
                                           data_cnn,
                                           off=float(idx * 2))
            cnn_dfs[cnn_index[0]] = data_cnn
            cnn_means, ax = do_plot(data_cnn,
                                    color,
                                    la,
                                    log_scale=p['log_scale_revelations'],
                                    max_val=max_val,
                                    fit_line=fl,
                                    ci=cnn_ci,
                                    ax=ax)
    else:
        data_cnn = get_cnn_results_by_revelation(
            set_index, filter_class_file=filter_class_file)
        if p['log_scale_revelations']:
            data_cnn = apply_log_scale(data_human, data_cnn)
        cnn_dfs['set_index'] = data_cnn
        cnn_means, ax = do_plot(data_cnn,
                                '#91bfdb',
                                cnn_labels,
                                log_scale=p['log_scale_revelations'],
                                max_val=200,
                                fit_line=cnn_fit_line,
                                ci=cnn_ci,
                                ax=ax)

    if human_stats.keys() and cnn_means is not None:
        p_values = {
            k: measure_p_value_human_v_cnn(v, cnn_means)
            for k, v in human_stats.iteritems()
        }
        # print 'Disabled human/cnn stats until we get synthetic subjects'
        # print p_values
        # Count per bin # of people that exceed machines.

    plt.legend(loc='upper left')
    plt.savefig(
        os.path.join(config.plot_path,
                     'perf_by_revelation_%s.png' % experiment_run))
    plt.savefig(
        os.path.join(config.plot_path,
                     'perf_by_revelation_%s.pdf' % experiment_run))
    print 'Saved to: %s' % os.path.join(
        config.plot_path, 'perf_by_revelation_and_mat_%s.png' % experiment_run)
    return human_dfs, cnn_dfs, human_ims, None
コード例 #4
0
def plot_results_by_revaluation_by_class(
        experiment_run,
        class_file,
        exclude_workerids=None,
        is_inverted_rev=False,
        human_ci=66.6,
        cnn_ci=66.6,
        human_fit_line=[''],  # ['linear','logistic'],
        cnn_fit_line=[''],  # ['linear','logistic'],
        human_colorpallete='Set2',
        cnn_colorpallete='Greys',
        fit_line='linear',
        data_filter=''):
    p = get_settings(experiment_run)
    set_index = p['set_index']

    # Parse class file
    exps = []
    for i, fn in enumerate([class_file]):
        fn_full = os.path.join(
            '/media/data_cifs/clicktionary/causal_experiment', fn)
        lines = open(fn_full, 'rt').read().splitlines()
        for l in lines:
            classname, cat = l.split(' ')
            cat = int(cat)
            if len(exps) <= cat:
                exps.append(set())
            exps[cat].add(classname)

    # Plot Human
    colors = sns.color_palette(human_colorpallete, len(exps))
    data_human = [
        get_human_results_by_revaluation(experiment_run=experiment_run,
                                         filename_filter=x,
                                         off=0,
                                         is_inverted_rev=is_inverted_rev,
                                         log_scale=p['log_scale_revelations'],
                                         exclude_workerids=exclude_workerids,
                                         data_filter=data_filter,
                                         return_ims=False)
        for idx, x in enumerate(exps)
    ]
    ax = None
    for x, c, l in zip(data_human, colors, exps):
        _, ax = do_plot(x,
                        c,
                        list(l),
                        log_scale=p['log_scale_revelations'],
                        ci=human_ci,
                        fit_line=human_fit_line,
                        ax=ax)

    # Plot CNN
    if 'cnn_class_file' in p.keys():
        filter_class_file = p['cnn_class_file']
    else:
        filter_class_file = None
    print set_index
    data_cnn = get_cnn_results_by_revelation(
        set_index, filter_class_file=filter_class_file)
    if p['log_scale_revelations']:
        data_cnn = apply_log_scale(data_human[0], data_cnn)
    colors = sns.color_palette(cnn_colorpallete, len(exps))[0]
    _, ax = do_plot(data_cnn,
                    colors,
                    'CNN',
                    log_scale=p['log_scale_revelations'],
                    ci=cnn_ci,
                    fit_line=cnn_fit_line,
                    ax=ax)
    plt.title('Accuracy by image revelation')
    plt.legend()
    plt.savefig(
        os.path.join(config.plot_path,
                     'perf_by_revelation_by_class_%s.png' % experiment_run))
コード例 #5
0
 def load(self, experiment_run, exclude_workerids=['']):
     self.p = get_settings(experiment_run)
     set_index, set_name = p['set_index'], p['set_name']
     self.load_ground_truth(set_index, set_name)
     self.load_participants(experiment_run, exclude_workerids)