def compute_bootstrap(self, nb_bootstrap_samples=1000):
        print "Computing bootstrap..."

        self.dataset['bootstrap_nitems_pval'] = np.nan*np.empty(self.dataset['n_items_size'])
        self.dataset['bootstrap_nitems'] = np.empty(self.dataset['n_items_size'], dtype=np.object)
        self.dataset['bootstrap_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size']), dtype=np.object)
        self.dataset['bootstrap_subject_nitems_pval'] = np.nan*np.empty((self.dataset['subject_size'], self.dataset['n_items_size']))


        for n_items_i, n_items in enumerate(np.unique(self.dataset['n_items'])):
            if n_items > 1:
                for subject_i, subject in enumerate(np.unique(self.dataset['subject'])):
                    print "Nitems %d, subject %d" % (n_items, subject)

                    # Bootstrap per subject and nitems
                    ids_filter = (self.dataset['subject'] == subject).flatten() & (self.dataset['n_items'] == n_items).flatten()

                    # Compute bootstrap if required
                    bootstrap = em_circmixtmodel.bootstrap_nontarget_stat(
                        self.dataset['response'][ids_filter, 0],
                        self.dataset['item_angle'][ids_filter, 0],
                        self.dataset['item_angle'][ids_filter, 1:n_items],
                        nb_bootstrap_samples=nb_bootstrap_samples)
                    self.dataset['bootstrap_subject_nitems'][subject_i, n_items_i] = bootstrap
                    self.dataset['bootstrap_subject_nitems_pval'][subject_i, n_items_i] = bootstrap['p_value']

                    print self.dataset['bootstrap_subject_nitems_pval'][:, n_items_i]

                print "Nitems %d, all subjects" % (n_items)

                # Data collapsed accross subjects
                ids_filter = (self.dataset['n_items'] == n_items).flatten()

                bootstrap = em_circmixtmodel.bootstrap_nontarget_stat(self.dataset['response'][ids_filter, 0], self.dataset['item_angle'][ids_filter, 0], self.dataset['item_angle'][ids_filter, 1:n_items], nb_bootstrap_samples=nb_bootstrap_samples)
                self.dataset['bootstrap_nitems'][n_items_i] = bootstrap
                self.dataset['bootstrap_nitems_pval'][n_items_i] = bootstrap['p_value']

                print self.dataset['bootstrap_nitems_pval']
def plots_boostrap(data_pbs, generator_module=None):
    '''
        Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use.
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True

    load_fit_bootstrap = True
    plots_hist_cdf = False
    estimate_bootstrap = True

    should_fit_bootstrap = True
    # caching_bootstrap_filename = None
    caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_bays09.pickle')

    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_bootstrap_nitems_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_nitems_samples']['results'])
    result_bootstrap_subject_nitems_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_subject_nitems_samples']['results'])


    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    dataset = load_experimental_data.load_data_bays09(fit_mixture_model=True)

    if load_fit_bootstrap:
        if caching_bootstrap_filename is not None:

            if os.path.exists(caching_bootstrap_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_bootstrap_filename, 'r') as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        bootstrap_nitems_samples = cached_data['bootstrap_nitems_samples']
                        bootstrap_subject_nitems_samples = cached_data['bootstrap_subject_nitems_samples']
                        should_fit_bootstrap = False

                except IOError:
                    print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits"

        if should_fit_bootstrap:

            bootstrap_nitems_samples = dict()
            bootstrap_subject_nitems_samples = dict()

            # Fit ECDF
            for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
                if n_items > 1:
                    print "Nitems %d, all subjects" % (n_items)
                    current_ecdf_allitems = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_nitems_samples[n_items_i]))

                    # Store in a dict(n_items_i) -> {ECDF object, n_items}
                    bootstrap_nitems_samples[n_items_i] = dict(ecdf=current_ecdf_allitems, n_items=n_items)

                    for subject_i, subject in enumerate(np.unique(dataset['subject'])):
                        print "Nitems %d, subject %d" % (n_items, subject)

                        current_ecdf_subj_items = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_subject_nitems_samples[subject_i, n_items_i]))

                        if n_items_i not in bootstrap_subject_nitems_samples:
                            bootstrap_subject_nitems_samples[n_items_i] = dict()
                        bootstrap_subject_nitems_samples[n_items_i][subject_i] = dict(ecdf=current_ecdf_subj_items, n_items=n_items, subject=subject)

            # Save everything to a file, for faster later plotting
            if caching_bootstrap_filename is not None:
                try:
                    with open(caching_bootstrap_filename, 'w') as filecache_out:
                        data_bootstrap = dict(bootstrap_nitems_samples=bootstrap_nitems_samples, bootstrap_subject_nitems_samples=bootstrap_subject_nitems_samples)
                        pickle.dump(data_bootstrap, filecache_out, protocol=2)
                except IOError:
                    print "Error writing out to caching file ", caching_bootstrap_filename


    if plots_hist_cdf:
        ## Plots now
        for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
            if n_items > 1:
                for subject_i, subject in enumerate(np.unique(dataset['subject'])):

                    # Histogram of samples, for subject/nitems
                    _, axes = plt.subplots(ncols=2, figsize=(12, 6))
                    axes[0].hist(utils.dropnan(result_bootstrap_subject_nitems_samples[subject_i, n_items_i]), bins=100, normed='density')
                    axes[0].set_xlim([0.0, 1.0])
                    # ECDF now
                    axes[1].plot(bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'].x, bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'].y, linewidth=2)
                    axes[1].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure('histecdf_bootstrap_nitems%d_subject%d_{label}_{unique_id}.pdf' % (n_items, subject))

                # Same for collapsed data accross subjects
                # Histogram of samples, for subject/nitems
                _, axes = plt.subplots(ncols=2, figsize=(12, 6))
                axes[0].hist(utils.dropnan(result_bootstrap_nitems_samples[n_items_i]), bins=100, normed='density')
                axes[0].set_xlim([0.0, 1.0])
                # ECDF now
                axes[1].plot(bootstrap_nitems_samples[n_items_i]['ecdf'].x, bootstrap_nitems_samples[n_items_i]['ecdf'].y, linewidth=2)
                axes[1].set_xlim([0.0, 1.0])

                if savefigs:
                    dataio.save_current_figure('histecdf_bootstrap_nitems%d_{label}_{unique_id}.pdf' % (n_items))

    if estimate_bootstrap:
        # Compute bootstrap p-value
        result_pvalue_bootstrap_nitems = np.empty(dataset['n_items_size'])*np.nan
        result_pvalue_bootstrap_subject_nitems_samples = np.empty((dataset['n_items_size'], dataset['subject_size']))*np.nan


        for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
            if n_items > 1:
                print "Nitems %d, all subjects" % (n_items)
                # Data collapsed accross subjects
                ids_filtered = (dataset['n_items'] == n_items).flatten()

                bootstrap = em_circularmixture.bootstrap_nontarget_stat(
                    dataset['response'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 1:n_items],
                    nontarget_bootstrap_ecdf=bootstrap_nitems_samples[n_items_i]['ecdf'])

                result_pvalue_bootstrap_nitems[n_items_i] = bootstrap['p_value']
                print "p_val:", result_pvalue_bootstrap_nitems

                for subject_i, subject in enumerate(np.unique(dataset['subject'])):
                    print "Nitems %d, subject %d" % (n_items, subject)

                    # Bootstrap per subject and nitems
                    ids_filtered = (dataset['subject'] == subject).flatten() & (dataset['n_items'] == n_items).flatten()

                    # Get pvalue
                    bootstrap = em_circularmixture.bootstrap_nontarget_stat(
                        dataset['response'][ids_filtered, 0],
                        dataset['item_angle'][ids_filtered, 0],
                        dataset['item_angle'][ids_filtered, 1:n_items],
                        nontarget_bootstrap_ecdf=bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'])
                    result_pvalue_bootstrap_subject_nitems_samples[n_items_i, subject_i] = bootstrap['p_value']

                    print "p_val:", result_pvalue_bootstrap_subject_nitems_samples[n_items_i, subject_i]

        signif_level = 0.05
        result_signif_nitems = result_pvalue_bootstrap_nitems < signif_level
        result_num_signif_subject_nitems = np.sum(result_pvalue_bootstrap_subject_nitems_samples < signif_level, axis=1)
        print "Summary:"
        print "Collapsed subjects:", result_signif_nitems
        print "Per subjects (%d total): %s" % (dataset['subject_size'], result_num_signif_subject_nitems)


    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['nb_repetitions', 'signif_level']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)

        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets')


    plt.show()


    return locals()
Exemple #3
0
def launcher_do_nontarget_bootstrap(args):
    '''
        Compute a bootstrap estimate, using outputs from the model run earlier
    '''

    print "Doing a piece of work for launcher_do_nontarget_bootstrap"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'mixed':
        # Mixed runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy'))
    else:
        # Conjunctive runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy'))

    data_responses_all = model_outputs['result_responses_all'][..., 0]
    data_target_all = model_outputs['result_target_all'][..., 0]
    data_nontargets_all = model_outputs['result_nontargets_all'][..., 0]
    T_space = model_outputs['T_space']
    sigmax_space = model_outputs['sigmax_space']

    K = data_nontargets_all.shape[-1]

    # Result arrays
    result_bootstrap_samples_allitems = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, K*all_parameters['num_repetitions']))

    search_progress = progress.Progress(sigmax_space.size*(T_space.size-1))

    for sigmax_i, sigmax in enumerate(sigmax_space):
        for T_i, T in enumerate(T_space[1:]):
            T_i += 1

            print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

            print "Bootstrap for T=%d, sigmax=%.2f, %d bootstrap samples" % (T, sigmax, all_parameters['num_repetitions'])

            # Update parameter

            ### WORK WORK WORK work? ###

            # Get some bootstrap samples
            bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, T_i],
                    data_target_all[sigmax_i, T_i],
                    data_nontargets_all[sigmax_i, T_i, :, :T_i],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)
            # bootstrap_allitems_nontargets_allitems = em_circularmixture_allitems.bootstrap_nontarget_stat(
            #         data_responses_all[sigmax_i, T_i],
            #         data_target_all[sigmax_i, T_i],
            #         data_nontargets_all[sigmax_i, T_i, :, :T_i],
            #         nb_bootstrap_samples=all_parameters['num_repetitions'],
            #         resample_targets=False)
            bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, T_i],
                    data_target_all[sigmax_i, T_i],
                    data_nontargets_all[sigmax_i, T_i, :, :T_i],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)

            # Collect and store responses
            # result_bootstrap_samples_allitems[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems['nontarget_bootstrap_samples']
            result_bootstrap_samples[sigmax_i, T_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples']

            result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples']
            result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i, :all_parameters['num_repetitions']*T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples']

            print result_bootstrap_samples_allitems[sigmax_i, T_i]
            print result_bootstrap_samples[sigmax_i, T_i]
            print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]
            print result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]

            ### /Work ###

            search_progress.increment()
            if run_counter % save_every == 0 or search_progress.done():
                dataio.save_variables_default(locals())
            run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
Exemple #4
0
def launcher_do_nontarget_bootstrap_misbindingruns(args):
    '''
        Compute a bootstrap estimate, using outputs from a Misbinding generator run.
    '''

    print "Doing a piece of work for launcher_do_nontarget_bootstrap"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'mixed' or all_parameters['subaction'] == '':
        # Mixed runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'SAVE_global_plots_misbinding_logposterior-plots_misbinding_logposterior-36eb41e9-6370-453e-995e-3876d5105388.npy'))

    data_responses_all = model_outputs['result_all_thetas']
    data_target = model_outputs['target_angle']
    data_nontargets = model_outputs['nontarget_angles']
    ratio_space = model_outputs['ratio_space']

    # Result arrays
    result_bootstrap_samples_allitems = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))

    search_progress = progress.Progress(ratio_space.size)

    for ratio_conj_i, ratio_conj in enumerate(ratio_space):
        print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

        print "Bootstrap for ratio=%.2f, %d bootstrap samples" % (ratio_conj, all_parameters['num_repetitions'])

        ### WORK WORK WORK work? ###

        # Get some bootstrap samples
        bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                data_responses_all[ratio_conj_i],
                data_target,
                data_nontargets,
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False)

        bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat(
                data_responses_all[ratio_conj_i],
                data_target,
                data_nontargets,
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False)

        # Collect and store responses
        result_bootstrap_samples[ratio_conj_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples']

        result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples']
        result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples']

        print result_bootstrap_samples_allitems[ratio_conj_i]
        print result_bootstrap_samples[ratio_conj_i]
        print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i]
        print result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i]

        ### /Work ###

        search_progress.increment()
        if run_counter % save_every == 0 or search_progress.done():
            dataio.save_variables_default(locals())
        run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
def launcher_do_bootstrap_experimental_sequential(args):
    '''
        Compute a bootstrap estimate, using outputs from the experimental data
    '''

    print "Doing a piece of work for launcher_do_bootstrap_experimental"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    dataset = load_experimental_data.load_data(
        experiment_id='gorgo11_sequential',
        fit_mixture_model=True)

    # Result arrays
    result_nontarget_bootstrap_nitems_trecall = np.nan*np.empty((
        dataset['n_items_size'], dataset['n_items_size'],
        all_parameters['num_repetitions']))
    result_nontarget_bootstrap_subject_nitems_trecall = np.nan*np.empty((
        dataset['subject_size'], dataset['n_items_size'],
        dataset['n_items_size'], all_parameters['num_repetitions']))

    search_progress = progress.Progress(dataset['subject_size']*(dataset['n_items_size']-1)*dataset['n_items_size'])

    for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
        if n_items > 1:
            for trecall_i, trecall in enumerate(np.unique(dataset['n_items'])):
                print "Nitems %d, trecall %d, all subjects" % (n_items, trecall)
                # Data collapsed accross subjects
                ids_filtered = (
                    (dataset['n_items'] == n_items) &
                    (dataset['probe'] == trecall) &
                    (~dataset['masked'])).flatten()

                bootstrap = em_circularmixture.bootstrap_nontarget_stat(
                    dataset['response'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 1:n_items],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)

                result_nontarget_bootstrap_nitems_trecall[n_items_i, trecall_i] = bootstrap['nontarget_bootstrap_samples']

                print result_nontarget_bootstrap_nitems_trecall

                for subject_i, subject in enumerate(np.unique(dataset['subject'])):
                    print "Nitems %d, trecall %d, subject %d" % (
                        n_items, trecall, subject)

                    # Bootstrap per subject and nitems
                    ids_filtered = (
                        (dataset['n_items'] == n_items) &
                        (dataset['probe'] == trecall) &
                        (dataset['subject'] == subject) &
                        (~dataset['masked'])).flatten()

                    # Compute bootstrap if required
                    bootstrap = em_circularmixture.bootstrap_nontarget_stat(
                        dataset['response'][ids_filtered, 0],
                        dataset['item_angle'][ids_filtered, 0],
                        dataset['item_angle'][ids_filtered, 1:n_items],
                        nb_bootstrap_samples=all_parameters['num_repetitions'],
                        resample_targets=False)
                    result_nontarget_bootstrap_subject_nitems_trecall[
                        subject_i, n_items_i, trecall_i] = bootstrap['nontarget_bootstrap_samples']

                    print result_nontarget_bootstrap_subject_nitems_trecall[:, n_items_i, trecall_i]

                    search_progress.increment()
                    if run_counter % save_every == 0 or search_progress.done():
                        dataio.save_variables_default(locals())
                    run_counter += 1


    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()