def plots_boostrap(data_pbs, generator_module=None): ''' Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use. ''' #### SETUP # savefigs = True savedata = True load_fit_bootstrap = True plots_hist_cdf = False estimate_bootstrap = True should_fit_bootstrap = True # caching_bootstrap_filename = None caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_bays09.pickle') plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_bootstrap_nitems_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_nitems_samples']['results']) result_bootstrap_subject_nitems_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_subject_nitems_samples']['results']) dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) dataset = load_experimental_data.load_data_bays09(fit_mixture_model=True) if load_fit_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, 'r') as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_nitems_samples = cached_data['bootstrap_nitems_samples'] bootstrap_subject_nitems_samples = cached_data['bootstrap_subject_nitems_samples'] should_fit_bootstrap = False except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" if should_fit_bootstrap: bootstrap_nitems_samples = dict() bootstrap_subject_nitems_samples = dict() # Fit ECDF for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])): if n_items > 1: print "Nitems %d, all subjects" % (n_items) current_ecdf_allitems = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_nitems_samples[n_items_i])) # Store in a dict(n_items_i) -> {ECDF object, n_items} bootstrap_nitems_samples[n_items_i] = dict(ecdf=current_ecdf_allitems, n_items=n_items) for subject_i, subject in enumerate(np.unique(dataset['subject'])): print "Nitems %d, subject %d" % (n_items, subject) current_ecdf_subj_items = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_subject_nitems_samples[subject_i, n_items_i])) if n_items_i not in bootstrap_subject_nitems_samples: bootstrap_subject_nitems_samples[n_items_i] = dict() bootstrap_subject_nitems_samples[n_items_i][subject_i] = dict(ecdf=current_ecdf_subj_items, n_items=n_items, subject=subject) # Save everything to a file, for faster later plotting if caching_bootstrap_filename is not None: try: with open(caching_bootstrap_filename, 'w') as filecache_out: data_bootstrap = dict(bootstrap_nitems_samples=bootstrap_nitems_samples, bootstrap_subject_nitems_samples=bootstrap_subject_nitems_samples) pickle.dump(data_bootstrap, filecache_out, protocol=2) except IOError: print "Error writing out to caching file ", caching_bootstrap_filename if plots_hist_cdf: ## Plots now for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])): if n_items > 1: for subject_i, subject in enumerate(np.unique(dataset['subject'])): # Histogram of samples, for subject/nitems _, axes = plt.subplots(ncols=2, figsize=(12, 6)) axes[0].hist(utils.dropnan(result_bootstrap_subject_nitems_samples[subject_i, n_items_i]), bins=100, normed='density') axes[0].set_xlim([0.0, 1.0]) # ECDF now axes[1].plot(bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'].x, bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'].y, linewidth=2) axes[1].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('histecdf_bootstrap_nitems%d_subject%d_{label}_{unique_id}.pdf' % (n_items, subject)) # Same for collapsed data accross subjects # Histogram of samples, for subject/nitems _, axes = plt.subplots(ncols=2, figsize=(12, 6)) axes[0].hist(utils.dropnan(result_bootstrap_nitems_samples[n_items_i]), bins=100, normed='density') axes[0].set_xlim([0.0, 1.0]) # ECDF now axes[1].plot(bootstrap_nitems_samples[n_items_i]['ecdf'].x, bootstrap_nitems_samples[n_items_i]['ecdf'].y, linewidth=2) axes[1].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('histecdf_bootstrap_nitems%d_{label}_{unique_id}.pdf' % (n_items)) if estimate_bootstrap: # Compute bootstrap p-value result_pvalue_bootstrap_nitems = np.empty(dataset['n_items_size'])*np.nan result_pvalue_bootstrap_subject_nitems_samples = np.empty((dataset['n_items_size'], dataset['subject_size']))*np.nan for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])): if n_items > 1: print "Nitems %d, all subjects" % (n_items) # Data collapsed accross subjects ids_filtered = (dataset['n_items'] == n_items).flatten() bootstrap = em_circularmixture.bootstrap_nontarget_stat( dataset['response'][ids_filtered, 0], dataset['item_angle'][ids_filtered, 0], dataset['item_angle'][ids_filtered, 1:n_items], nontarget_bootstrap_ecdf=bootstrap_nitems_samples[n_items_i]['ecdf']) result_pvalue_bootstrap_nitems[n_items_i] = bootstrap['p_value'] print "p_val:", result_pvalue_bootstrap_nitems for subject_i, subject in enumerate(np.unique(dataset['subject'])): print "Nitems %d, subject %d" % (n_items, subject) # Bootstrap per subject and nitems ids_filtered = (dataset['subject'] == subject).flatten() & (dataset['n_items'] == n_items).flatten() # Get pvalue bootstrap = em_circularmixture.bootstrap_nontarget_stat( dataset['response'][ids_filtered, 0], dataset['item_angle'][ids_filtered, 0], dataset['item_angle'][ids_filtered, 1:n_items], nontarget_bootstrap_ecdf=bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf']) result_pvalue_bootstrap_subject_nitems_samples[n_items_i, subject_i] = bootstrap['p_value'] print "p_val:", result_pvalue_bootstrap_subject_nitems_samples[n_items_i, subject_i] signif_level = 0.05 result_signif_nitems = result_pvalue_bootstrap_nitems < signif_level result_num_signif_subject_nitems = np.sum(result_pvalue_bootstrap_subject_nitems_samples < signif_level, axis=1) print "Summary:" print "Collapsed subjects:", result_signif_nitems print "Per subjects (%d total): %s" % (dataset['subject_size'], result_num_signif_subject_nitems) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['nb_repetitions', 'signif_level'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets') plt.show() return locals()
def compute_vtest(self): self.dataset['vtest_nitems'] = np.empty(self.dataset['n_items_size'])*np.nan for n_items_i, n_items in enumerate(np.unique(self.dataset['n_items'])): if n_items > 1: self.dataset['vtest_nitems'][n_items_i] = utils.V_test(utils.dropnan(self.dataset['errors_nontarget_nitems'][n_items_i]).flatten())['pvalue']
def plots_boostrap(data_pbs, generator_module=None): ''' Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use. ''' #### SETUP # savefigs = True savedata = True load_fit_bootstrap = True plots_hist_cdf = True estimate_bootstrap = True should_fit_bootstrap = True # caching_bootstrap_filename = None caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap.pickle') plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_sumnontarget']['results']) result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples']['results']) result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_allnontarget']['results']) sigmax_space = data_pbs.loaded_data['datasets_list'][0]['sigmax_space'] T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape print result_bootstrap_samples.shape print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if load_fit_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, 'r') as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T'] bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T'] bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T'] should_fit_bootstrap = False except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" if should_fit_bootstrap: bootstrap_ecdf_bays_sigmax_T = dict() bootstrap_ecdf_allitems_sum_sigmax_T = dict() bootstrap_ecdf_allitems_all_sigmax_T = dict() # Fit bootstrap for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T>1: # One bootstrap CDF per condition bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i])) bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i])) bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i])) # Store in a dict(sigmax) -> dict(T) -> ECDF object bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax) bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax) bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax) # Save everything to a file, for faster later plotting if caching_bootstrap_filename is not None: try: with open(caching_bootstrap_filename, 'w') as filecache_out: data_bootstrap = dict(bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T, bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T) pickle.dump(data_bootstrap, filecache_out, protocol=2) except IOError: print "Error writing out to caching file ", caching_bootstrap_filename if plots_hist_cdf: ## Plots now for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # Histogram of samples _, axes = plt.subplots(ncols=3, figsize=(18, 6)) axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed='density') axes[0].set_xlim([0.0, 1.0]) axes[1].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]), bins=100, normed='density') axes[1].set_xlim([0.0, 1.0]) axes[2].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]), bins=100, normed='density') axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) # ECDF now _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6)) axes[0].plot(bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[0].set_xlim([0.0, 1.0]) axes[1].plot(bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[1].set_xlim([0.0, 1.0]) axes[2].plot(bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) if estimate_bootstrap: ## Should be in reloader_error_distribution_mixed_121113 instead model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy')) data_responses_all = model_outputs['result_responses_all'][..., 0] data_target_all = model_outputs['result_target_all'][..., 0] data_nontargets_all = model_outputs['result_nontargets_all'][..., 0] # Compute bootstrap p-value result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan for sigmax_i, sigmax in enumerate(sigmax_space): for T in T_space[1:]: bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf']) result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value'] result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value'] print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['nb_repetitions'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets') plt.show() return locals()
def compute_average_histograms(self): ''' Do per subject and nitems, get average histogram ''' angle_space = np.linspace(-np.pi, np.pi, 51) self.dataset['hist_cnts_target_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size'], angle_space.size - 1))*np.nan self.dataset['hist_cnts_nontarget_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size'], angle_space.size - 1))*np.nan self.dataset['pvalue_nontarget_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size']))*np.nan for subject_i, subject in enumerate(np.unique(self.dataset['subject'])): for n_items_i, n_items in enumerate(np.unique(self.dataset['n_items'])): self.dataset['hist_cnts_target_subject_nitems'][subject_i, n_items_i], x, bins = utils.histogram_binspace(utils.dropnan(self.dataset['errors_subject_nitems'][subject_i, n_items_i]), bins=angle_space, norm='density') self.dataset['hist_cnts_nontarget_subject_nitems'][subject_i, n_items_i], x, bins = utils.histogram_binspace(utils.dropnan(self.dataset['errors_nontarget_subject_nitems'][subject_i, n_items_i]), bins=angle_space, norm='density') if n_items > 1: self.dataset['pvalue_nontarget_subject_nitems'][subject_i, n_items_i] = utils.V_test(utils.dropnan(self.dataset['errors_nontarget_subject_nitems'][subject_i, n_items_i]).flatten())['pvalue'] self.dataset['hist_cnts_target_nitems_stats'] = dict(mean=np.mean(self.dataset['hist_cnts_target_subject_nitems'], axis=0), std=np.std(self.dataset['hist_cnts_target_subject_nitems'], axis=0), sem=np.std(self.dataset['hist_cnts_target_subject_nitems'], axis=0)/np.sqrt(self.dataset['subject_size'])) self.dataset['hist_cnts_nontarget_nitems_stats'] = dict(mean=np.mean(self.dataset['hist_cnts_nontarget_subject_nitems'], axis=0), std=np.std(self.dataset['hist_cnts_nontarget_subject_nitems'], axis=0), sem=np.std(self.dataset['hist_cnts_nontarget_subject_nitems'], axis=0)/np.sqrt(self.dataset['subject_size']))
def preprocess(self, parameters): ''' The Bays2009 dataset is completely different... Some preprocessing is already done, so just do the plots we care about ''' # Make some aliases self.dataset['n_items'] = self.dataset['N'].astype(int) self.dataset['n_items_size'] = np.unique(self.dataset['n_items']).size self.dataset['subject'] = self.dataset['subject'].astype(int) self.dataset['subject_size'] = np.unique(self.dataset['subject']).size self.dataset['error'] = self.dataset['E'] self.dataset['response'] = self.dataset['Y'] self.dataset['item_angle'] = self.dataset['X'] self.dataset['item_colour'] = self.dataset['A'] - np.pi self.dataset['probe'] = np.zeros(self.dataset['response'].shape, dtype=int) self.dataset['errors_nitems'] = np.empty(self.dataset['n_items_size'], dtype=np.object) self.dataset['errors_nontarget_nitems'] = np.empty(self.dataset['n_items_size'], dtype=np.object) self.dataset['errors_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size']), dtype=np.object) self.dataset['errors_nontarget_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size']), dtype=np.object) self.dataset['vtest_nitems'] = np.empty(self.dataset['n_items_size'])*np.nan self.dataset['precision_subject_nitems_bays'] = np.nan*np.empty((self.dataset['subject_size'], self.dataset['n_items_size'])) self.dataset['precision_subject_nitems_theo'] = np.nan*np.empty((self.dataset['subject_size'], self.dataset['n_items_size'])) self.dataset['precision_subject_nitems_theo_nochance'] = np.nan*np.empty((self.dataset['subject_size'], self.dataset['n_items_size'])) self.dataset['precision_subject_nitems_bays_notreatment'] = np.nan*np.empty((self.dataset['subject_size'], self.dataset['n_items_size'])) # Fit mixture model if parameters.get('fit_mixture_model', False): self.fit_mixture_model_cached(caching_save_filename=parameters.get('mixture_model_cache', None)) # Compute errors and Vtests for n_items_i, n_items in enumerate(np.unique(self.dataset['n_items'])): for subject_i, subject in enumerate(np.unique(self.dataset['subject'])): # Data per subject ids_filtered = (self.dataset['subject'] == subject).flatten() & (self.dataset['n_items'] == n_items).flatten() self.dataset['errors_subject_nitems'][subject_i, n_items_i] = self.dataset['error'][ids_filtered, 0] self.dataset['errors_nontarget_subject_nitems'][subject_i, n_items_i] = self.dataset['error'][ids_filtered, 1:n_items] # Precisions # Compute the precision self.dataset['precision_subject_nitems_bays'][subject_i, n_items_i] = self.compute_precision(self.dataset['errors_subject_nitems'][subject_i, n_items_i], remove_chance_level=True, correct_orientation=False, use_wrong_precision=True) self.dataset['precision_subject_nitems_theo'][subject_i, n_items_i] = self.compute_precision(self.dataset['errors_subject_nitems'][subject_i, n_items_i], remove_chance_level=False, correct_orientation=False, use_wrong_precision=False) self.dataset['precision_subject_nitems_theo_nochance'][subject_i, n_items_i] = self.compute_precision(self.dataset['errors_subject_nitems'][subject_i, n_items_i], remove_chance_level=True, correct_orientation=False, use_wrong_precision=False) self.dataset['precision_subject_nitems_bays_notreatment'][subject_i, n_items_i] = self.compute_precision(self.dataset['errors_subject_nitems'][subject_i, n_items_i], remove_chance_level=False, correct_orientation=False, use_wrong_precision=True) # Data collapsed accross subjects ids_filtered = (self.dataset['n_items'] == n_items).flatten() self.dataset['errors_nitems'][n_items_i] = self.dataset['error'][ids_filtered, 0] self.dataset['errors_nontarget_nitems'][n_items_i] = self.dataset['error'][ids_filtered, 1:n_items] if n_items > 1: self.dataset['vtest_nitems'][n_items_i] = utils.V_test(utils.dropnan(self.dataset['errors_nontarget_nitems'][n_items_i]).flatten())['pvalue'] # Save item in a nice format for the model fit self.generate_data_to_fit() # Save data in a better format to fit the new collapsed mixture model self.generate_data_subject_split() # Fit the new Collapsed mixture model if parameters.get('fit_mixture_model', False): self.fit_collapsed_mixture_model_cached(caching_save_filename=parameters.get('collapsed_mixture_model_cache', None)) # Perform Bootstrap analysis if required if parameters.get('should_compute_bootstrap', False): self.compute_bootstrap_cached( caching_save_filename=parameters.get('bootstrap_cache', None), nb_bootstrap_samples=parameters.get('nb_bootstrap_samples', 1000)) # Do per subject and nitems, get average histogram self.compute_average_histograms()
def plots_memory_curves(data_pbs, generator_module=None): """ Reload and plot memory curve of a Mixed code. Can use Marginal Fisher Information and fitted Mixture Model as well """ #### SETUP # savefigs = True savedata = True do_error_distrib_fits = True plot_pcolor_fit_precision_to_fisherinfo = True plot_selected_memory_curves = False plot_best_memory_curves = False plot_best_error_distrib = True colormap = None # or 'cubehelix' plt.rcParams["font.size"] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_all_precisions_mean = utils.nanmean(data_pbs.dict_arrays["result_all_precisions"]["results"], axis=-1) result_all_precisions_std = utils.nanstd(data_pbs.dict_arrays["result_all_precisions"]["results"], axis=-1) result_em_fits_mean = utils.nanmean(data_pbs.dict_arrays["result_em_fits"]["results"], axis=-1) result_em_fits_std = utils.nanstd(data_pbs.dict_arrays["result_em_fits"]["results"], axis=-1) result_marginal_inv_fi_mean = utils.nanmean(data_pbs.dict_arrays["result_marginal_inv_fi"]["results"], axis=-1) result_marginal_inv_fi_std = utils.nanstd(data_pbs.dict_arrays["result_marginal_inv_fi"]["results"], axis=-1) result_marginal_fi_mean = utils.nanmean(1.0 / data_pbs.dict_arrays["result_marginal_inv_fi"]["results"], axis=-1) result_marginal_fi_std = utils.nanstd(1.0 / data_pbs.dict_arrays["result_marginal_inv_fi"]["results"], axis=-1) result_responses_all = data_pbs.dict_arrays["result_responses"]["results"] result_target_all = data_pbs.dict_arrays["result_target"]["results"] result_nontargets_all = data_pbs.dict_arrays["result_nontargets"]["results"] M_space = data_pbs.loaded_data["parameters_uniques"]["M"].astype(int) sigmax_space = data_pbs.loaded_data["parameters_uniques"]["sigmax"] T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"] nb_repetitions = result_responses_all.shape[-1] print M_space print sigmax_space print T_space print result_all_precisions_mean.shape, result_em_fits_mean.shape, result_marginal_inv_fi_mean.shape dataio = DataIO.DataIO( output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/", label="global_" + dataset_infos["save_output_filename"], ) ## Load Experimental data experim_datadir = os.environ.get("WORKDIR_DROP", os.path.split(load_experimental_data.__file__)[0]) data_simult = load_experimental_data.load_data_simult( data_dir=os.path.normpath(os.path.join(experim_datadir, "../../experimental_data/")), fit_mixture_model=True ) gorgo11_experimental_precision = data_simult["precision_nitems_theo"] gorgo11_experimental_kappa = np.array([data["kappa"] for _, data in data_simult["em_fits_nitems"]["mean"].items()]) gorgo11_experimental_emfits_mean = np.array( [ [data[key] for _, data in data_simult["em_fits_nitems"]["mean"].items()] for key in ["kappa", "mixt_target", "mixt_nontargets", "mixt_random"] ] ) gorgo11_experimental_emfits_std = np.array( [ [data[key] for _, data in data_simult["em_fits_nitems"]["std"].items()] for key in ["kappa", "mixt_target", "mixt_nontargets", "mixt_random"] ] ) gorgo11_experimental_emfits_sem = gorgo11_experimental_emfits_std / np.sqrt(np.unique(data_simult["subject"]).size) experim_datadir = os.environ.get("WORKDIR_DROP", os.path.split(load_experimental_data.__file__)[0]) data_bays2009 = load_experimental_data.load_data_bays09( data_dir=os.path.normpath(os.path.join(experim_datadir, "../../experimental_data/")), fit_mixture_model=True ) bays09_experimental_mixtures_mean = data_bays2009["em_fits_nitems_arrays"]["mean"] bays09_experimental_mixtures_std = data_bays2009["em_fits_nitems_arrays"]["std"] # add interpolated points for 3 and 5 items emfit_mean_intpfct = spint.interp1d(np.unique(data_bays2009["n_items"]), bays09_experimental_mixtures_mean) bays09_experimental_mixtures_mean_compatible = emfit_mean_intpfct(np.arange(1, 7)) emfit_std_intpfct = spint.interp1d(np.unique(data_bays2009["n_items"]), bays09_experimental_mixtures_std) bays09_experimental_mixtures_std_compatible = emfit_std_intpfct(np.arange(1, 7)) T_space_bays09 = np.arange(1, 6) # Boost non-targets # bays09_experimental_mixtures_mean_compatible[1] *= 1.5 # bays09_experimental_mixtures_mean_compatible[2] /= 1.5 # bays09_experimental_mixtures_mean_compatible /= np.sum(bays09_experimental_mixtures_mean_compatible, axis=0) # Compute some landscapes of fit! # dist_diff_precision_margfi = np.sum(np.abs(result_all_precisions_mean*2. - result_marginal_fi_mean[..., 0])**2., axis=-1) # dist_ratio_precision_margfi = np.sum(np.abs((result_all_precisions_mean*2.)/result_marginal_fi_mean[..., 0] - 1.0)**2., axis=-1) # dist_diff_emkappa_margfi = np.sum(np.abs(result_em_fits_mean[..., 0]*2. - result_marginal_fi_mean[..., 0])**2., axis=-1) # dist_ratio_emkappa_margfi = np.sum(np.abs((result_em_fits_mean[..., 0]*2.)/result_marginal_fi_mean[..., 0] - 1.0)**2., axis=-1) dist_diff_precision_experim = np.sum( np.abs(result_all_precisions_mean[..., : gorgo11_experimental_kappa.size] - gorgo11_experimental_precision) ** 2.0, axis=-1, ) dist_diff_emkappa_experim = np.sum( np.abs(result_em_fits_mean[..., 0, : gorgo11_experimental_kappa.size] - gorgo11_experimental_kappa) ** 2.0, axis=-1, ) dist_diff_em_mixtures_bays09 = np.sum( np.sum((result_em_fits_mean[..., 1:4] - bays09_experimental_mixtures_mean_compatible[1:].T) ** 2.0, axis=-1), axis=-1, ) dist_diff_modelfits_experfits_bays09 = np.sum( np.sum((result_em_fits_mean[..., :4] - bays09_experimental_mixtures_mean_compatible.T) ** 2.0, axis=-1), axis=-1 ) if do_error_distrib_fits: print "computing error distribution histograms fits" # Now try to fit histograms of errors to target/nontargets bays09_hist_target_mean = data_bays2009["hist_cnts_target_nitems_stats"]["mean"] bays09_hist_target_std = data_bays2009["hist_cnts_target_nitems_stats"]["std"] bays09_hist_nontarget_mean = data_bays2009["hist_cnts_nontarget_nitems_stats"]["mean"] bays09_hist_nontarget_std = data_bays2009["hist_cnts_nontarget_nitems_stats"]["std"] T_space_bays09_filt = np.unique(data_bays2009["n_items"]) angle_space = np.linspace(-np.pi, np.pi, bays09_hist_target_mean.shape[-1] + 1) bins_center = angle_space[:-1] + np.diff(angle_space)[0] / 2 errors_targets = utils.wrap_angles(result_responses_all - result_target_all) hist_targets_all = np.empty( (M_space.size, sigmax_space.size, T_space_bays09_filt.size, angle_space.size - 1, nb_repetitions) ) errors_nontargets = np.nan * np.empty(result_nontargets_all.shape) hist_nontargets_all = np.empty( (M_space.size, sigmax_space.size, T_space_bays09_filt.size, angle_space.size - 1, nb_repetitions) ) for M_i, M in enumerate(M_space): for sigmax_i, sigmax in enumerate(sigmax_space): for T_bays_i, T_bays in enumerate(T_space_bays09_filt): for repet_i in xrange(nb_repetitions): # Could do a nicer indexing but f**k it # Histogram errors to targets hist_targets_all[M_i, sigmax_i, T_bays_i, :, repet_i], x, bins = utils.histogram_binspace( utils.dropnan(errors_targets[M_i, sigmax_i, T_bays - 1, ..., repet_i]), bins=angle_space, norm="density", ) # Compute the error between the responses and nontargets. errors_nontargets[M_i, sigmax_i, T_bays - 1, :, :, repet_i] = utils.wrap_angles( ( result_responses_all[M_i, sigmax_i, T_bays - 1, :, repet_i, np.newaxis] - result_nontargets_all[M_i, sigmax_i, T_bays - 1, :, :, repet_i] ) ) # Histogram it hist_nontargets_all[M_i, sigmax_i, T_bays_i, :, repet_i], x, bins = utils.histogram_binspace( utils.dropnan(errors_nontargets[M_i, sigmax_i, T_bays - 1, ..., repet_i]), bins=angle_space, norm="density", ) hist_targets_mean = utils.nanmean(hist_targets_all, axis=-1).filled(np.nan) hist_targets_std = utils.nanstd(hist_targets_all, axis=-1).filled(np.nan) hist_nontargets_mean = utils.nanmean(hist_nontargets_all, axis=-1).filled(np.nan) hist_nontargets_std = utils.nanstd(hist_nontargets_all, axis=-1).filled(np.nan) # Compute distances to experimental histograms dist_diff_hist_target_bays09 = np.nansum( np.nansum((hist_targets_mean - bays09_hist_target_mean) ** 2.0, axis=-1), axis=-1 ) dist_diff_hist_nontargets_bays09 = np.nansum( np.nansum((hist_nontargets_mean - bays09_hist_nontarget_mean) ** 2.0, axis=-1), axis=-1 ) dist_diff_hist_nontargets_5_6items_bays09 = np.nansum( np.nansum((hist_nontargets_mean[:, :, -2:] - bays09_hist_nontarget_mean[-2:]) ** 2.0, axis=-1), axis=-1 ) if plot_pcolor_fit_precision_to_fisherinfo: # Check fit between precision and Experiments utils.pcolor_2d_data( dist_diff_precision_experim, log_scale=True, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", xlabel_format="%d", ) if savefigs: dataio.save_current_figure("match_precision_exper_log_pcolor_{label}_{unique_id}.pdf") utils.pcolor_2d_data( dist_diff_emkappa_experim, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", xlabel_format="%d" ) if savefigs: dataio.save_current_figure("match_emkappa_model_exper_pcolor_{label}_{unique_id}.pdf") utils.pcolor_2d_data( dist_diff_em_mixtures_bays09, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", log_scale=True, xlabel_format="%d", ) if savefigs: dataio.save_current_figure("match_emmixtures_experbays09_log_pcolor_{label}_{unique_id}.pdf") utils.pcolor_2d_data( dist_diff_modelfits_experfits_bays09, log_scale=True, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", xlabel_format="%d", ) if savefigs: dataio.save_current_figure("match_diff_emfits_experbays09_pcolor_{label}_{unique_id}.pdf") if do_error_distrib_fits: utils.pcolor_2d_data( dist_diff_hist_target_bays09, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", log_scale=True, xlabel_format="%d", ) if savefigs: dataio.save_current_figure("match_hist_targets_experbays09_log_pcolor_{label}_{unique_id}.pdf") utils.pcolor_2d_data( dist_diff_hist_nontargets_bays09, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", log_scale=True, xlabel_format="%d", ) if savefigs: dataio.save_current_figure("match_hist_nontargets_experbays09_log_pcolor_{label}_{unique_id}.pdf") utils.pcolor_2d_data( dist_diff_hist_nontargets_5_6items_bays09, x=M_space, y=sigmax_space, xlabel="M", ylabel="sigmax", log_scale=True, xlabel_format="%d", ) if savefigs: dataio.save_current_figure( "match_hist_nontargets_6items_experbays09_log_pcolor_{label}_{unique_id}.pdf" ) # Macro plot def mem_plot_precision(sigmax_i, M_i, mem_exp_prec): ax = utils.plot_mean_std_area( T_space[: mem_exp_prec.size], mem_exp_prec, np.zeros(mem_exp_prec.size), linewidth=3, fmt="o-", markersize=8, label="Experimental data", ) ax = utils.plot_mean_std_area( T_space[: mem_exp_prec.size], result_all_precisions_mean[M_i, sigmax_i, : mem_exp_prec.size], result_all_precisions_std[M_i, sigmax_i, : mem_exp_prec.size], ax_handle=ax, linewidth=3, fmt="o-", markersize=8, label="Precision of samples", ) # ax = utils.plot_mean_std_area(T_space, 0.5*result_marginal_fi_mean[..., 0][M_i, sigmax_i], 0.5*result_marginal_fi_std[..., 0][M_i, sigmax_i], ax_handle=ax, linewidth=3, fmt='o-', markersize=8, label='Marginal Fisher Information') # ax = utils.plot_mean_std_area(T_space, result_em_fits_mean[..., 0][M_i, sigmax_i], result_em_fits_std[..., 0][M_i, sigmax_i], ax_handle=ax, xlabel='Number of items', ylabel="Inverse variance $[rad^{-2}]$", linewidth=3, fmt='o-', markersize=8, label='Fitted kappa') ax.set_title("M %d, sigmax %.2f" % (M_space[M_i], sigmax_space[sigmax_i])) ax.legend() ax.set_xlim([0.9, mem_exp_prec.size + 0.1]) ax.set_xticks(range(1, mem_exp_prec.size + 1)) ax.set_xticklabels(range(1, mem_exp_prec.size + 1)) if savefigs: dataio.save_current_figure( "memorycurves_precision_M%dsigmax%.2f_{label}_{unique_id}.pdf" % (M_space[M_i], sigmax_space[sigmax_i]) ) def mem_plot_kappa(sigmax_i, M_i, exp_kappa_mean, exp_kappa_std=None): ax = utils.plot_mean_std_area( T_space[: exp_kappa_mean.size], exp_kappa_mean, exp_kappa_std, linewidth=3, fmt="o-", markersize=8, label="Experimental data", ) ax = utils.plot_mean_std_area( T_space[: exp_kappa_mean.size], result_em_fits_mean[..., : exp_kappa_mean.size, 0][M_i, sigmax_i], result_em_fits_std[..., : exp_kappa_mean.size, 0][M_i, sigmax_i], xlabel="Number of items", ylabel="Memory error $[rad^{-2}]$", linewidth=3, fmt="o-", markersize=8, label="Fitted kappa", ax_handle=ax, ) # ax = utils.plot_mean_std_area(T_space, 0.5*result_marginal_fi_mean[..., 0][M_i, sigmax_i], 0.5*result_marginal_fi_std[..., 0][M_i, sigmax_i], ax_handle=ax, linewidth=3, fmt='o-', markersize=8, label='Marginal Fisher Information') ax.set_title("M %d, sigmax %.2f" % (M_space[M_i], sigmax_space[sigmax_i])) ax.legend() ax.set_xlim([0.9, exp_kappa_mean.size + 0.1]) ax.set_xticks(range(1, exp_kappa_mean.size + 1)) ax.set_xticklabels(range(1, exp_kappa_mean.size + 1)) ax.get_figure().canvas.draw() if savefigs: dataio.save_current_figure( "memorycurves_kappa_M%dsigmax%.2f_{label}_{unique_id}.pdf" % (M_space[M_i], sigmax_space[sigmax_i]) ) def em_plot(sigmax_i, M_i): f, ax = plt.subplots() ax2 = ax.twinx() # left axis, kappa ax = utils.plot_mean_std_area( T_space, result_em_fits_mean[..., 0][M_i, sigmax_i], result_em_fits_std[..., 0][M_i, sigmax_i], xlabel="Number of items", ylabel="Inverse variance $[rad^{-2}]$", ax_handle=ax, linewidth=3, fmt="o-", markersize=8, label="Fitted kappa", color="k", ) # Right axis, mixture probabilities utils.plot_mean_std_area( T_space, result_em_fits_mean[..., 1][M_i, sigmax_i], result_em_fits_std[..., 1][M_i, sigmax_i], xlabel="Number of items", ylabel="Mixture probabilities", ax_handle=ax2, linewidth=3, fmt="o-", markersize=8, label="Target", ) utils.plot_mean_std_area( T_space, result_em_fits_mean[..., 2][M_i, sigmax_i], result_em_fits_std[..., 2][M_i, sigmax_i], xlabel="Number of items", ylabel="Mixture probabilities", ax_handle=ax2, linewidth=3, fmt="o-", markersize=8, label="Nontarget", ) utils.plot_mean_std_area( T_space, result_em_fits_mean[..., 3][M_i, sigmax_i], result_em_fits_std[..., 3][M_i, sigmax_i], xlabel="Number of items", ylabel="Mixture probabilities", ax_handle=ax2, linewidth=3, fmt="o-", markersize=8, label="Random", ) lines, labels = ax.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax.legend(lines + lines2, labels + labels2) ax.set_title("M %d, sigmax %.2f" % (M_space[M_i], sigmax_space[sigmax_i])) ax.set_xlim([0.9, T_space.size]) ax.set_xticks(range(1, T_space.size + 1)) ax.set_xticklabels(range(1, T_space.size + 1)) f.canvas.draw() if savefigs: dataio.save_current_figure( "memorycurves_emfits_M%dsigmax%.2f_{label}_{unique_id}.pdf" % (M_space[M_i], sigmax_space[sigmax_i]) ) def em_plot_paper(sigmax_i, M_i): f, ax = plt.subplots() # Right axis, mixture probabilities utils.plot_mean_std_area( T_space_bays09, result_em_fits_mean[..., 1][M_i, sigmax_i][: T_space_bays09.size], result_em_fits_std[..., 1][M_i, sigmax_i][: T_space_bays09.size], xlabel="Number of items", ylabel="Mixture probabilities", ax_handle=ax, linewidth=3, fmt="o-", markersize=5, label="Target", ) utils.plot_mean_std_area( T_space_bays09, result_em_fits_mean[..., 2][M_i, sigmax_i][: T_space_bays09.size], result_em_fits_std[..., 2][M_i, sigmax_i][: T_space_bays09.size], xlabel="Number of items", ylabel="Mixture probabilities", ax_handle=ax, linewidth=3, fmt="o-", markersize=5, label="Nontarget", ) utils.plot_mean_std_area( T_space_bays09, result_em_fits_mean[..., 3][M_i, sigmax_i][: T_space_bays09.size], result_em_fits_std[..., 3][M_i, sigmax_i][: T_space_bays09.size], xlabel="Number of items", ylabel="Mixture probabilities", ax_handle=ax, linewidth=3, fmt="o-", markersize=5, label="Random", ) ax.legend(prop={"size": 15}) ax.set_title("M %d, sigmax %.2f" % (M_space[M_i], sigmax_space[sigmax_i])) ax.set_xlim([1.0, T_space_bays09.size]) ax.set_ylim([0.0, 1.1]) ax.set_xticks(range(1, T_space_bays09.size + 1)) ax.set_xticklabels(range(1, T_space_bays09.size + 1)) f.canvas.draw() if savefigs: dataio.save_current_figure( "memorycurves_emfits_paper_M%dsigmax%.2f_{label}_{unique_id}.pdf" % (M_space[M_i], sigmax_space[sigmax_i]) ) def hist_errors_targets_nontargets(hists_toplot_mean, hists_toplot_std, title="", M=0, sigmax=0, yaxis_lim="auto"): f1, axes1 = plt.subplots( ncols=hists_toplot_mean.shape[-2], figsize=(hists_toplot_mean.shape[-2] * 6, 6), sharey=True ) for T_bays_i, T_bays in enumerate(T_space_bays09_filt): if not np.all(np.isnan(hists_toplot_mean[T_bays_i])): axes1[T_bays_i].bar( bins_center, hists_toplot_mean[T_bays_i], width=2.0 * np.pi / (angle_space.size - 1), align="center", yerr=hists_toplot_std[T_bays_i], ) axes1[T_bays_i].set_title("N=%d" % T_bays) # axes1[T_{}] axes1[T_bays_i].set_xlim( [bins_center[0] - np.pi / (angle_space.size - 1), bins_center[-1] + np.pi / (angle_space.size - 1)] ) if yaxis_lim == "target": axes1[T_bays_i].set_ylim([0.0, 2.0]) elif yaxis_lim == "nontarget": axes1[T_bays_i].set_ylim([0.0, 0.3]) else: axes1[T_bays_i].set_ylim([0.0, np.nanmax(hists_toplot_mean + hists_toplot_std) * 1.1]) axes1[T_bays_i].set_xticks((-np.pi, -np.pi / 2, 0, np.pi / 2.0, np.pi)) axes1[T_bays_i].set_xticklabels( (r"$-\pi$", r"$-\frac{\pi}{2}$", r"$0$", r"$\frac{\pi}{2}$", r"$\pi$"), fontsize=16 ) f1.canvas.draw() if savefigs: dataio.save_current_figure( "memorycurves_hist_%s_paper_M%dsigmax%.2f_{label}_{unique_id}.pdf" % (title, M, sigmax) ) ################################# if plot_selected_memory_curves: selected_values = [[0.84, 0.23], [0.84, 0.19]] for current_values in selected_values: # Find the indices M_i = np.argmin(np.abs(current_values[0] - M_space)) sigmax_i = np.argmin(np.abs(current_values[1] - sigmax_space)) mem_plot_precision(sigmax_i, M_i) mem_plot_kappa(sigmax_i, M_i) if plot_best_memory_curves: # Best precision fit best_axis2_i_all = np.argmin(dist_diff_precision_experim, axis=1) for axis1_i, best_axis2_i in enumerate(best_axis2_i_all): mem_plot_precision(best_axis2_i, axis1_i, gorgo11_experimental_precision) # Best kappa fit best_axis2_i_all = np.argmin(dist_diff_emkappa_experim, axis=1) for axis1_i, best_axis2_i in enumerate(best_axis2_i_all): mem_plot_kappa( best_axis2_i, axis1_i, gorgo11_experimental_emfits_mean[0], gorgo11_experimental_emfits_std[0] ) # em_plot(best_axis2_i, axis1_i) # Best em parameters fit to Bays09 best_axis2_i_all = np.argmin(dist_diff_modelfits_experfits_bays09, axis=1) # best_axis2_i_all = np.argmin(dist_diff_em_mixtures_bays09, axis=1) for axis1_i, best_axis2_i in enumerate(best_axis2_i_all): mem_plot_kappa( best_axis2_i, axis1_i, bays09_experimental_mixtures_mean_compatible[0, : T_space_bays09.size], bays09_experimental_mixtures_std_compatible[0, : T_space_bays09.size], ) # em_plot(best_axis2_i, axis1_i) em_plot_paper(best_axis2_i, axis1_i) if plot_best_error_distrib and do_error_distrib_fits: # Best target histograms best_axis2_i_all = np.argmin(dist_diff_hist_target_bays09, axis=1) for axis1_i, best_axis2_i in enumerate(best_axis2_i_all): hist_errors_targets_nontargets( hist_targets_mean[axis1_i, best_axis2_i], hist_targets_std[axis1_i, best_axis2_i], "target", M=M_space[axis1_i], sigmax=sigmax_space[best_axis2_i], yaxis_lim="target", ) # Best nontarget histograms best_axis2_i_all = np.argmin(dist_diff_hist_nontargets_bays09, axis=1) for axis1_i, best_axis2_i in enumerate(best_axis2_i_all): hist_errors_targets_nontargets( hist_nontargets_mean[axis1_i, best_axis2_i], hist_nontargets_std[axis1_i, best_axis2_i], "nontarget", M=M_space[axis1_i], sigmax=sigmax_space[best_axis2_i], yaxis_lim="nontarget", ) all_args = data_pbs.loaded_data["args_list"] variables_to_save = [ "gorgo11_experimental_precision", "gorgo11_experimental_kappa", "bays09_experimental_mixtures_mean_compatible", "T_space", ] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="memory_curves") plt.show() return locals()
def plots_boostrap(data_pbs, generator_module=None): """ Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use. """ #### SETUP # savefigs = True savedata = True load_fit_bootstrap = True plots_hist_cdf = False estimate_bootstrap = False should_fit_bootstrap = True # caching_bootstrap_filename = None caching_bootstrap_filename = os.path.join( generator_module.pbs_submission_infos["simul_out_dir"], "outputs", "cache_bootstrap.pickle" ) plt.rcParams["font.size"] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze( data_pbs.dict_arrays["result_bootstrap_samples_allitems_uniquekappa_sumnontarget"]["results"] ) result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays["result_bootstrap_samples"]["results"]) result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze( data_pbs.dict_arrays["result_bootstrap_samples_allitems_uniquekappa_allnontarget"]["results"] ) sigmax_space = data_pbs.loaded_data["datasets_list"][0]["sigmax_space"] T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"] print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape print result_bootstrap_samples.shape print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape dataio = DataIO( output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/", label="global_" + dataset_infos["save_output_filename"], ) if load_fit_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, "r") as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_bays_sigmax_T = cached_data["bootstrap_ecdf_bays_sigmax_T"] bootstrap_ecdf_allitems_sum_sigmax_T = cached_data["bootstrap_ecdf_allitems_sum_sigmax_T"] bootstrap_ecdf_allitems_all_sigmax_T = cached_data["bootstrap_ecdf_allitems_all_sigmax_T"] should_fit_bootstrap = False except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" if should_fit_bootstrap: bootstrap_ecdf_bays_sigmax_T = dict() bootstrap_ecdf_allitems_sum_sigmax_T = dict() bootstrap_ecdf_allitems_all_sigmax_T = dict() # Fit bootstrap for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # One bootstrap CDF per condition bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF( utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]) ) bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]) ) bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]) ) # Store in a dict(sigmax) -> dict(T) -> ECDF object bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict( ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax ) bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict( ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax ) bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict( ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax ) # Save everything to a file, for faster later plotting if caching_bootstrap_filename is not None: try: with open(caching_bootstrap_filename, "w") as filecache_out: data_bootstrap = dict( bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T, bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T, ) pickle.dump(data_bootstrap, filecache_out, protocol=2) except IOError: print "Error writing out to caching file ", caching_bootstrap_filename if plots_hist_cdf: ## Plots now for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # Histogram of samples _, axes = plt.subplots(ncols=3, figsize=(18, 6)) axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed="density") axes[0].set_xlim([0.0, 1.0]) axes[1].hist( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]), bins=100, normed="density", ) axes[1].set_xlim([0.0, 1.0]) axes[2].hist( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]), bins=100, normed="density", ) axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure( "hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf" % (sigmax, T) ) # ECDF now _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6)) axes[0].plot( bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]["ecdf"].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]["ecdf"].y, linewidth=2, ) axes[0].set_xlim([0.0, 1.0]) axes[1].plot( bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]["ecdf"].x, bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]["ecdf"].y, linewidth=2, ) axes[1].set_xlim([0.0, 1.0]) axes[2].plot( bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]["ecdf"].x, bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]["ecdf"].y, linewidth=2, ) axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure( "ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf" % (sigmax, T) ) if estimate_bootstrap: model_outputs = utils.load_npy( os.path.join( os.environ["WORKDIR_DROP"], "Experiments/error_distribution/error_distribution_conj_M100T6repetitions5_121113_outputs/global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy", ) ) data_responses_all = model_outputs["result_responses_all"][..., 0] data_target_all = model_outputs["result_target_all"][..., 0] data_nontargets_all = model_outputs["result_nontargets_all"][..., 0] # Compute bootstrap p-value for sigmax_i, sigmax in enumerate(sigmax_space): for T in T_space[1:]: bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat( data_responses_all[sigmax_i, (T - 1)], data_target_all[sigmax_i, (T - 1)], data_nontargets_all[sigmax_i, (T - 1), :, : (T - 1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T - 1]["ecdf"], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T - 1]["ecdf"], ) # TODO finish here! all_args = data_pbs.loaded_data["args_list"] variables_to_save = ["nb_repetitions"] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="bootstrap_nontargets") plt.show() return locals()
def plots_errors_distribution(data_pbs, generator_module=None): ''' Reload responses Plot errors distributions. ''' #### SETUP # savefigs = True savedata = True plot_persigmax = True do_best_nontarget = False load_test_bootstrap = True caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_errordistrib_mixed_sigmaxT.pickle') colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 angle_space = np.linspace(-np.pi, np.pi, 51) # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_responses_all = data_pbs.dict_arrays['result_responses']['results'] result_target_all = data_pbs.dict_arrays['result_target']['results'] result_nontargets_all = data_pbs.dict_arrays['result_nontargets']['results'] result_em_fits_all = data_pbs.dict_arrays['result_em_fits']['results'] T_space = data_pbs.loaded_data['parameters_uniques']['T'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] nb_repetitions = result_responses_all.shape[-1] N = result_responses_all.shape[-2] result_pval_vtest_nontargets = np.empty((sigmax_space.size, T_space.size))*np.nan result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan print sigmax_space print T_space print result_responses_all.shape, result_target_all.shape, result_nontargets_all.shape, result_em_fits_all.shape dataio = DataIO.DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if load_test_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, 'r') as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T'] bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T'] bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T'] except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" load_test_bootstrap = False if load_test_bootstrap: # Now compute the pvalue for each sigmax/T # only use 1000 samples data_responses_all = result_responses_all[..., 0] data_target_all = result_target_all[..., 0] data_nontargets_all = result_nontargets_all[..., 0] # Compute bootstrap p-value for sigmax_i, sigmax in enumerate(sigmax_space): for T in T_space[1:]: bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf']) result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value'] result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value'] print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05) if plot_persigmax: T_space_filtered = np.array([1, 2, 4, 6]) for sigmax_i, sigmax in enumerate(sigmax_space): print "sigmax: ", sigmax # Compute the error between the response and the target errors_targets = utils.wrap_angles(result_responses_all[sigmax_i] - result_target_all[sigmax_i]) errors_nontargets = np.nan*np.empty(result_nontargets_all[sigmax_i].shape) if do_best_nontarget: errors_best_nontarget = np.empty(errors_targets.shape) for T_i in xrange(1, T_space.size): for repet_i in xrange(nb_repetitions): # Could do a nicer indexing but f**k it # Compute the error between the responses and nontargets. errors_nontargets[T_i, :, :, repet_i] = utils.wrap_angles((result_responses_all[sigmax_i, T_i, :, repet_i, np.newaxis] - result_nontargets_all[sigmax_i, T_i, :, :, repet_i])) # Errors between the response the best nontarget. if do_best_nontarget: errors_best_nontarget[T_i, :, repet_i] = errors_nontargets[T_i, np.arange(errors_nontargets.shape[1]), np.nanargmin(np.abs(errors_nontargets[T_i, ..., repet_i]), axis=1), repet_i] f1, axes1 = plt.subplots(ncols=T_space_filtered.size, figsize=(T_space_filtered.size*6, 6), sharey=True) f2, axes2 = plt.subplots(ncols=T_space_filtered.size-1, figsize=((T_space_filtered.size-1)*6, 6), sharey=True) for T_i, T in enumerate(T_space_filtered): print "T: ", T # Now, per T items, show the distribution of errors and of errors to non target # Error to target # hist_errors_targets = np.zeros((angle_space.size, nb_repetitions)) # for repet_i in xrange(nb_repetitions): # hist_errors_targets[:, repet_i], _, _ = utils_math.histogram_binspace(errors_targets[T_i, :, repet_i], bins=angle_space) # f, ax = plt.subplots() # ax.bar(angle_space, np.mean(hist_errors_targets, axis=-1), width=2.*np.pi/(angle_space.size-1), align='center') # ax.set_xlim([angle_space[0] - np.pi/(angle_space.size-1), angle_space[-1] + np.pi/(angle_space.size-1)]) # utils.plot_mean_std_area(angle_space, np.mean(hist_errors_targets, axis=-1), np.std(hist_errors_targets, axis=-1)) # utils.hist_samples_density_estimation(errors_targets[T_i].reshape(nb_repetitions*N), bins=angle_space, title='Errors between response and target, N=%d' % (T)) utils.hist_angular_data(utils.dropnan(errors_targets[T_i]), bins=angle_space, norm='density', ax_handle=axes1[T_i], pretty_xticks=True) axes1[T_i].set_ylim([0., 2.0]) if T > 1: # Error to nontarget # ax_handle = utils.hist_samples_density_estimation(errors_nontargets[T_i, :, :T_i].reshape(nb_repetitions*N*T_i), bins=angle_space, title='Errors between response and non targets, N=%d' % (T)) utils.hist_angular_data(utils.dropnan(errors_nontargets[T_i, :, :T_i]), bins=angle_space, title='N=%d' % (T), norm='density', ax_handle=axes2[T_i-1], pretty_xticks=True) axes2[T_i-1].set_title('') result_pval_vtest_nontargets[sigmax_i, T_i] = utils.V_test(utils.dropnan(errors_nontargets[T_i, :, :T_i]))['pvalue'] print result_pval_vtest_nontargets[sigmax_i, T_i] # axes2[T_i-1].text(0.03, 0.96, "Vtest pval: %.2f" % (result_pval_vtest_nontargets[sigmax_i, T_i]), transform=axes2[T_i - 1].transAxes, horizontalalignment='left', fontsize=12) axes2[T_i-1].text(0.03, 0.94, "$p=%.1f$" % (result_pvalue_bootstrap_sum[sigmax_i, T_i]), transform=axes2[T_i - 1].transAxes, horizontalalignment='left', fontsize=18) axes2[T_i-1].set_ylim([0., 0.30]) # Error to best non target if do_best_nontarget: utils.hist_samples_density_estimation(errors_best_nontarget[T_i].reshape(nb_repetitions*N), bins=angle_space, title='N=%d' % (T)) if savefigs: dataio.save_current_figure('error_bestnontarget_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) if savefigs: plt.figure(f1.number) plt.tight_layout() dataio.save_current_figure('error_target_hist_sigmax%.2f_Tall_{label}_{unique_id}.pdf' % (sigmax)) plt.figure(f2.number) plt.tight_layout() dataio.save_current_figure('error_nontargets_hist_sigmax%.2f_Tall_{label}_{unique_id}.pdf' % (sigmax)) all_args = data_pbs.loaded_data['args_list'] if savedata: dataio.save_variables_default(locals()) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='error_distribution') plt.show() return locals()
def plots_errors_distribution(data_pbs, generator_module=None): ''' Reload responses Plot errors distributions. ''' #### SETUP # savefigs = True savedata = True plot_persigmax = True do_best_nontarget = False colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 angle_space = np.linspace(-np.pi, np.pi, 51) # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_responses_all = data_pbs.dict_arrays['result_responses']['results'] result_target_all = data_pbs.dict_arrays['result_target']['results'] result_nontargets_all = data_pbs.dict_arrays['result_nontargets']['results'] result_em_fits_all = data_pbs.dict_arrays['result_em_fits']['results'] T_space = data_pbs.loaded_data['parameters_uniques']['T'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] nb_repetitions = result_responses_all.shape[-1] N = result_responses_all.shape[-2] result_pval_vtest_nontargets = np.empty((sigmax_space.size, T_space.size))*np.nan print sigmax_space print T_space print result_responses_all.shape, result_target_all.shape, result_nontargets_all.shape, result_em_fits_all.shape dataio = DataIO.DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if plot_persigmax: for sigmax_i, sigmax in enumerate(sigmax_space): print "sigmax: ", sigmax # Compute the error between the response and the target errors_targets = utils.wrap_angles(result_responses_all[sigmax_i] - result_target_all[sigmax_i]) errors_nontargets = np.empty(result_nontargets_all[sigmax_i].shape) errors_best_nontarget = np.empty(errors_targets.shape) for T_i in xrange(1, T_space.size): for repet_i in xrange(nb_repetitions): # Could do a nicer indexing but f**k it # Compute the error between the responses and nontargets. errors_nontargets[T_i, :, :, repet_i] = utils.wrap_angles((result_responses_all[sigmax_i, T_i, :, repet_i, np.newaxis] - result_nontargets_all[sigmax_i, T_i, :, :, repet_i])) # Errors between the response the best nontarget. if do_best_nontarget: errors_best_nontarget[T_i, :, repet_i] = errors_nontargets[T_i, np.arange(errors_nontargets.shape[1]), np.nanargmin(np.abs(errors_nontargets[T_i, ..., repet_i]), axis=1), repet_i] for T_i, T in enumerate(T_space): print "T: ", T # Now, per T items, show the distribution of errors and of errors to non target # Error to target utils.hist_samples_density_estimation(utils.dropnan(errors_targets[T_i]), bins=angle_space, title='Errors between response and target, N=%d' % (T)) if savefigs: dataio.save_current_figure('error_target_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) if T > 1: # Error to nontarget ax_handle = utils.hist_samples_density_estimation(utils.dropnan(errors_nontargets[T_i, :, :T_i]), bins=angle_space, title='Errors between response and non targets, N=%d' % (T)) result_pval_vtest_nontargets[sigmax, T_i] = utils.V_test(utils.dropnan(errors_nontargets[T_i, :, :T_i]))['pvalue'] print result_pval_vtest_nontargets[sigmax, T_i] ax_handle.text(0.02, 0.97, "Vtest pval: %.2f" % (result_pval_vtest_nontargets[sigmax, T_i]), transform=ax_handle.transAxes, horizontalalignment='left', fontsize=12) if savefigs: dataio.save_current_figure('error_nontargets_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) # Error to best non target if do_best_nontarget: utils.hist_samples_density_estimation(utils.dropnan(errors_best_nontarget[T_i]), bins=angle_space, title='Errors between response and best non target, N=%d' % (T)) if savefigs: dataio.save_current_figure('error_bestnontarget_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) all_args = data_pbs.loaded_data['args_list'] if savedata: dataio.save_variables_default(locals()) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='error_distribution') plt.show() return locals()
def plots_boostrap(data_pbs, generator_module=None): ''' Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use. ''' #### SETUP # savefigs = True savedata = True plots_hist_cdf = True should_fit_bootstrap = True # caching_bootstrap_filename = None caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_conjresp.pickle') plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_bootstrap_samples_allitems = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems']['results']) result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples']['results']) sigmax_space = data_pbs.loaded_data['datasets_list'][0]['sigmax_space'] T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] print result_bootstrap_samples_allitems.shape print result_bootstrap_samples.shape dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if plots_hist_cdf: # Do one plot per min distance. # # Plot Log-likelihood of Mixture model, sanity check # utils.pcolor_2d_data(result_em_fits_mean[min_dist_i, ..., -1].T, x=ratio_space, y=sigmax_space, xlabel='ratio', ylabel='sigma_x', title='EM loglik, min_dist=%.3f' % min_dist, log_scale=False) # if savefigs: # dataio.save_current_figure('em_loglik_permindist_mindist%.2f_ratiosigmax_{label}_{unique_id}.pdf' % min_dist) if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, 'r') as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_allitems_sigmax_T = cached_data['bootstrap_ecdf_allitems_sigmax_T'] bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T'] should_fit_bootstrap = False except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" if should_fit_bootstrap: bootstrap_ecdf_allitems_sigmax_T = dict() bootstrap_ecdf_bays_sigmax_T = dict() # Fit bootstrap for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T>1: # One bootstrap CDF per condition bootstrap_ecdf_allitems = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems[sigmax_i, T_i])) bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i])) # Store in a dict(sigmax) -> dict(T) -> ECDF object bootstrap_ecdf_allitems_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems, T=T, sigmax=sigmax) bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax) # Save everything to a file, for faster later plotting if caching_bootstrap_filename is not None: try: with open(caching_bootstrap_filename, 'w') as filecache_out: data_bootstrap = dict(bootstrap_ecdf_allitems_sigmax_T=bootstrap_ecdf_allitems_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T) pickle.dump(data_bootstrap, filecache_out, protocol=2) except IOError: print "Error writing out to caching file ", caching_bootstrap_filename ## Plots now for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # Histogram of samples _, axes = plt.subplots(ncols=2, figsize=(12, 6)) axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed='density') axes[0].set_xlim([0.0, 1.0]) axes[1].hist(utils.dropnan(result_bootstrap_samples_allitems[sigmax_i, T_i]), bins=100, normed='density') axes[1].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) # ECDF now _, axes = plt.subplots(ncols=2, sharey=True, figsize=(12, 6)) axes[0].plot(bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[0].set_xlim([0.0, 1.0]) axes[1].plot(bootstrap_ecdf_allitems_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[1].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['nb_repetitions'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets') plt.show() return locals()
def compute_bootstrap_samples(dataset, nb_bootstrap_samples, angle_space): responses_resampled = np.empty( (np.unique(dataset['n_items']).size, nb_bootstrap_samples), dtype=np.object) error_nontargets_resampled = np.empty( (np.unique(dataset['n_items']).size, nb_bootstrap_samples), dtype=np.object) error_targets_resampled = np.empty( (np.unique(dataset['n_items']).size, nb_bootstrap_samples), dtype=np.object) hist_cnts_nontarget_bootstraps_nitems = np.empty( (np.unique(dataset['n_items']).size, nb_bootstrap_samples, angle_space.size - 1))*np.nan hist_cnts_target_bootstraps_nitems = np.empty( (np.unique(dataset['n_items']).size, nb_bootstrap_samples, angle_space.size - 1))*np.nan bootstrap_data = { 'responses_resampled': responses_resampled, 'error_nontargets_resampled': error_nontargets_resampled, 'error_targets_resampled': error_targets_resampled, 'hist_cnts_nontarget_bootstraps_nitems': hist_cnts_nontarget_bootstraps_nitems, 'hist_cnts_target_bootstraps_nitems': hist_cnts_target_bootstraps_nitems, } for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])): # Data collapsed accross subjects ids_filtered = (dataset['n_items'] == n_items).flatten() if n_items > 1: # Get random bootstrap nontargets bootstrap_nontargets = utils.sample_angle( dataset['item_angle'][ids_filtered, 1:n_items].shape + (nb_bootstrap_samples, )) # Compute associated EM fits # bootstrap_results = [] for bootstrap_i in progress.ProgressDisplay(np.arange(nb_bootstrap_samples), display=progress.SINGLE_LINE): em_fit = em_circularmixture.fit( dataset['response'][ids_filtered, 0], dataset['item_angle'][ids_filtered, 0], bootstrap_nontargets[..., bootstrap_i]) # bootstrap_results.append(em_fit) # Get EM samples responses_resampled[n_items_i, bootstrap_i] = ( em_circularmixture.sample_from_fit( em_fit, dataset['item_angle'][ids_filtered, 0], bootstrap_nontargets[..., bootstrap_i])) # Compute the errors error_nontargets_resampled[n_items_i, bootstrap_i] = ( utils.wrap_angles( responses_resampled[n_items_i, bootstrap_i][:, np.newaxis] - bootstrap_nontargets[..., bootstrap_i])) error_targets_resampled[n_items_i, bootstrap_i] = ( utils.wrap_angles( responses_resampled[n_items_i, bootstrap_i] - dataset['item_angle'][ids_filtered, 0])) # Bin everything (hist_cnts_nontarget_bootstraps_nitems[n_items_i, bootstrap_i], _, _) = ( utils.histogram_binspace( utils.dropnan( error_nontargets_resampled[n_items_i, bootstrap_i]), bins=angle_space, norm='density')) (hist_cnts_target_bootstraps_nitems[n_items_i, bootstrap_i], _, _) = ( utils.histogram_binspace( utils.dropnan( error_targets_resampled[n_items_i, bootstrap_i]), bins=angle_space, norm='density')) return bootstrap_data
def plot_bootstrap_randomsamples(): ''' Do histograms with random samples from bootstrap nontarget estimates ''' dataio = DataIO(label='plotpaper_bootstrap_randomized') nb_bootstrap_samples = 200 use_precomputed = True angle_space = np.linspace(-np.pi, np.pi, 51) bins_center = angle_space[:-1] + np.diff(angle_space)[0]/2 data_bays2009 = load_experimental_data.load_data_bays09(fit_mixture_model=True) ## Super long simulation, use precomputed data maybe? if use_precomputed: data = pickle.load(open('/Users/loicmatthey/Dropbox/UCL/1-phd/Work/Visual_working_memory/code/git-bayesian-visual-working-memory/Data/cache_randomized_bootstrap_samples_plots_paper_theo_plotbootstrapsamples/bootstrap_histo_katz.npy', 'r')) responses_resampled = data['responses_resampled'] error_nontargets_resampled = data['error_nontargets_resampled'] error_targets_resampled = data['error_targets_resampled'] hist_cnts_nontarget_bootstraps_nitems = data['hist_cnts_nontarget_bootstraps_nitems'] hist_cnts_target_bootstraps_nitems = data['hist_cnts_target_bootstraps_nitems'] else: responses_resampled = np.empty((np.unique(data_bays2009['n_items']).size, nb_bootstrap_samples), dtype=np.object) error_nontargets_resampled = np.empty((np.unique(data_bays2009['n_items']).size, nb_bootstrap_samples), dtype=np.object) error_targets_resampled = np.empty((np.unique(data_bays2009['n_items']).size, nb_bootstrap_samples), dtype=np.object) hist_cnts_nontarget_bootstraps_nitems = np.empty((np.unique(data_bays2009['n_items']).size, nb_bootstrap_samples, angle_space.size - 1))*np.nan hist_cnts_target_bootstraps_nitems = np.empty((np.unique(data_bays2009['n_items']).size, nb_bootstrap_samples, angle_space.size - 1))*np.nan for n_items_i, n_items in enumerate(np.unique(data_bays2009['n_items'])): # Data collapsed accross subjects ids_filtered = (data_bays2009['n_items'] == n_items).flatten() if n_items > 1: # Get random bootstrap nontargets bootstrap_nontargets = utils.sample_angle(data_bays2009['item_angle'][ids_filtered, 1:n_items].shape + (nb_bootstrap_samples, )) # Compute associated EM fits bootstrap_results = [] for bootstrap_i in progress.ProgressDisplay(np.arange(nb_bootstrap_samples), display=progress.SINGLE_LINE): em_fit = em_circularmixture_allitems_uniquekappa.fit(data_bays2009['response'][ids_filtered, 0], data_bays2009['item_angle'][ids_filtered, 0], bootstrap_nontargets[..., bootstrap_i]) bootstrap_results.append(em_fit) # Get EM samples responses_resampled[n_items_i, bootstrap_i] = em_circularmixture_allitems_uniquekappa.sample_from_fit(em_fit, data_bays2009['item_angle'][ids_filtered, 0], bootstrap_nontargets[..., bootstrap_i]) # Compute the errors error_nontargets_resampled[n_items_i, bootstrap_i] = utils.wrap_angles(responses_resampled[n_items_i, bootstrap_i][:, np.newaxis] - bootstrap_nontargets[..., bootstrap_i]) error_targets_resampled[n_items_i, bootstrap_i] = utils.wrap_angles(responses_resampled[n_items_i, bootstrap_i] - data_bays2009['item_angle'][ids_filtered, 0]) # Bin everything hist_cnts_nontarget_bootstraps_nitems[n_items_i, bootstrap_i], x, bins = utils.histogram_binspace(utils.dropnan(error_nontargets_resampled[n_items_i, bootstrap_i]), bins=angle_space, norm='density') hist_cnts_target_bootstraps_nitems[n_items_i, bootstrap_i], x, bins = utils.histogram_binspace(utils.dropnan(error_targets_resampled[n_items_i, bootstrap_i]), bins=angle_space, norm='density') # Now show average histogram hist_cnts_target_bootstraps_nitems_mean = np.mean(hist_cnts_target_bootstraps_nitems, axis=-2) hist_cnts_target_bootstraps_nitems_std = np.std(hist_cnts_target_bootstraps_nitems, axis=-2) hist_cnts_target_bootstraps_nitems_sem = hist_cnts_target_bootstraps_nitems_std/np.sqrt(hist_cnts_target_bootstraps_nitems.shape[1]) hist_cnts_nontarget_bootstraps_nitems_mean = np.mean(hist_cnts_nontarget_bootstraps_nitems, axis=-2) hist_cnts_nontarget_bootstraps_nitems_std = np.std(hist_cnts_nontarget_bootstraps_nitems, axis=-2) hist_cnts_nontarget_bootstraps_nitems_sem = hist_cnts_nontarget_bootstraps_nitems_std/np.sqrt(hist_cnts_target_bootstraps_nitems.shape[1]) f1, axes1 = plt.subplots(ncols=np.unique(data_bays2009['n_items']).size-1, figsize=((np.unique(data_bays2009['n_items']).size-1)*6, 6), sharey=True) for n_items_i, n_items in enumerate(np.unique(data_bays2009['n_items'])): if n_items>1: utils.plot_mean_std_area(bins_center, hist_cnts_nontarget_bootstraps_nitems_mean[n_items_i], hist_cnts_nontarget_bootstraps_nitems_sem[n_items_i], ax_handle=axes1[n_items_i-1], color='k') # Now add the Data histograms axes1[n_items_i-1].bar(bins_center, data_bays2009['hist_cnts_nontarget_nitems_stats']['mean'][n_items_i], width=2.*np.pi/(angle_space.size-1), align='center', yerr=data_bays2009['hist_cnts_nontarget_nitems_stats']['sem'][n_items_i]) # axes4[n_items_i-1].set_title('N=%d' % n_items) axes1[n_items_i-1].set_xlim([bins_center[0]-np.pi/(angle_space.size-1), bins_center[-1]+np.pi/(angle_space.size-1)]) # axes3[n_items_i-1].set_ylim([0., 2.0]) axes1[n_items_i-1].set_xticks((-np.pi, -np.pi/2, 0, np.pi/2., np.pi)) axes1[n_items_i-1].set_xticklabels((r'$-\pi$', r'$-\frac{\pi}{2}$', r'$0$', r'$\frac{\pi}{2}$', r'$\pi$'), fontsize=16) # axes1[n_items_i-1].bar(bins_center, hist_cnts_nontarget_bootstraps_nitems_mean[n_items_i], width=2.*np.pi/(angle_space.size-1), align='center', yerr=hist_cnts_nontarget_bootstraps_nitems_std[n_items_i]) axes1[n_items_i-1].get_figure().canvas.draw() if dataio is not None: plt.tight_layout() dataio.save_current_figure("hist_error_nontarget_persubj_{label}_{unique_id}.pdf") if False: f2, axes2 = plt.subplots(ncols=np.unique(data_bays2009['n_items']).size-1, figsize=((np.unique(data_bays2009['n_items']).size-1)*6, 6), sharey=True) for n_items_i, n_items in enumerate(np.unique(data_bays2009['n_items'])): utils.plot_mean_std_area(bins_center, hist_cnts_target_bootstraps_nitems_mean[n_items_i], hist_cnts_target_bootstraps_nitems_std[n_items_i], ax_handle=axes2[n_items_i-1]) # axes2[n_items_i-1].bar(bins_center, hist_cnts_target_bootstraps_nitems_mean[n_items_i], width=2.*np.pi/(angle_space.size-1), align='center', yerr=hist_cnts_target_bootstraps_nitems_std[n_items_i]) return locals()