def plots_errors_distribution(data_pbs, generator_module=None): ''' Reload responses Plot errors distributions. ''' #### SETUP # savefigs = True savedata = True plot_persigmax = True do_best_nontarget = False load_test_bootstrap = True caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_errordistrib_mixed_sigmaxT.pickle') colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 angle_space = np.linspace(-np.pi, np.pi, 51) # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_responses_all = data_pbs.dict_arrays['result_responses']['results'] result_target_all = data_pbs.dict_arrays['result_target']['results'] result_nontargets_all = data_pbs.dict_arrays['result_nontargets']['results'] result_em_fits_all = data_pbs.dict_arrays['result_em_fits']['results'] T_space = data_pbs.loaded_data['parameters_uniques']['T'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] nb_repetitions = result_responses_all.shape[-1] N = result_responses_all.shape[-2] result_pval_vtest_nontargets = np.empty((sigmax_space.size, T_space.size))*np.nan result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan print sigmax_space print T_space print result_responses_all.shape, result_target_all.shape, result_nontargets_all.shape, result_em_fits_all.shape dataio = DataIO.DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if load_test_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, 'r') as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T'] bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T'] bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T'] except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" load_test_bootstrap = False if load_test_bootstrap: # Now compute the pvalue for each sigmax/T # only use 1000 samples data_responses_all = result_responses_all[..., 0] data_target_all = result_target_all[..., 0] data_nontargets_all = result_nontargets_all[..., 0] # Compute bootstrap p-value for sigmax_i, sigmax in enumerate(sigmax_space): for T in T_space[1:]: bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf']) result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value'] result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value'] print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05) if plot_persigmax: T_space_filtered = np.array([1, 2, 4, 6]) for sigmax_i, sigmax in enumerate(sigmax_space): print "sigmax: ", sigmax # Compute the error between the response and the target errors_targets = utils.wrap_angles(result_responses_all[sigmax_i] - result_target_all[sigmax_i]) errors_nontargets = np.nan*np.empty(result_nontargets_all[sigmax_i].shape) if do_best_nontarget: errors_best_nontarget = np.empty(errors_targets.shape) for T_i in xrange(1, T_space.size): for repet_i in xrange(nb_repetitions): # Could do a nicer indexing but f**k it # Compute the error between the responses and nontargets. errors_nontargets[T_i, :, :, repet_i] = utils.wrap_angles((result_responses_all[sigmax_i, T_i, :, repet_i, np.newaxis] - result_nontargets_all[sigmax_i, T_i, :, :, repet_i])) # Errors between the response the best nontarget. if do_best_nontarget: errors_best_nontarget[T_i, :, repet_i] = errors_nontargets[T_i, np.arange(errors_nontargets.shape[1]), np.nanargmin(np.abs(errors_nontargets[T_i, ..., repet_i]), axis=1), repet_i] f1, axes1 = plt.subplots(ncols=T_space_filtered.size, figsize=(T_space_filtered.size*6, 6), sharey=True) f2, axes2 = plt.subplots(ncols=T_space_filtered.size-1, figsize=((T_space_filtered.size-1)*6, 6), sharey=True) for T_i, T in enumerate(T_space_filtered): print "T: ", T # Now, per T items, show the distribution of errors and of errors to non target # Error to target # hist_errors_targets = np.zeros((angle_space.size, nb_repetitions)) # for repet_i in xrange(nb_repetitions): # hist_errors_targets[:, repet_i], _, _ = utils_math.histogram_binspace(errors_targets[T_i, :, repet_i], bins=angle_space) # f, ax = plt.subplots() # ax.bar(angle_space, np.mean(hist_errors_targets, axis=-1), width=2.*np.pi/(angle_space.size-1), align='center') # ax.set_xlim([angle_space[0] - np.pi/(angle_space.size-1), angle_space[-1] + np.pi/(angle_space.size-1)]) # utils.plot_mean_std_area(angle_space, np.mean(hist_errors_targets, axis=-1), np.std(hist_errors_targets, axis=-1)) # utils.hist_samples_density_estimation(errors_targets[T_i].reshape(nb_repetitions*N), bins=angle_space, title='Errors between response and target, N=%d' % (T)) utils.hist_angular_data(utils.dropnan(errors_targets[T_i]), bins=angle_space, norm='density', ax_handle=axes1[T_i], pretty_xticks=True) axes1[T_i].set_ylim([0., 2.0]) if T > 1: # Error to nontarget # ax_handle = utils.hist_samples_density_estimation(errors_nontargets[T_i, :, :T_i].reshape(nb_repetitions*N*T_i), bins=angle_space, title='Errors between response and non targets, N=%d' % (T)) utils.hist_angular_data(utils.dropnan(errors_nontargets[T_i, :, :T_i]), bins=angle_space, title='N=%d' % (T), norm='density', ax_handle=axes2[T_i-1], pretty_xticks=True) axes2[T_i-1].set_title('') result_pval_vtest_nontargets[sigmax_i, T_i] = utils.V_test(utils.dropnan(errors_nontargets[T_i, :, :T_i]))['pvalue'] print result_pval_vtest_nontargets[sigmax_i, T_i] # axes2[T_i-1].text(0.03, 0.96, "Vtest pval: %.2f" % (result_pval_vtest_nontargets[sigmax_i, T_i]), transform=axes2[T_i - 1].transAxes, horizontalalignment='left', fontsize=12) axes2[T_i-1].text(0.03, 0.94, "$p=%.1f$" % (result_pvalue_bootstrap_sum[sigmax_i, T_i]), transform=axes2[T_i - 1].transAxes, horizontalalignment='left', fontsize=18) axes2[T_i-1].set_ylim([0., 0.30]) # Error to best non target if do_best_nontarget: utils.hist_samples_density_estimation(errors_best_nontarget[T_i].reshape(nb_repetitions*N), bins=angle_space, title='N=%d' % (T)) if savefigs: dataio.save_current_figure('error_bestnontarget_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) if savefigs: plt.figure(f1.number) plt.tight_layout() dataio.save_current_figure('error_target_hist_sigmax%.2f_Tall_{label}_{unique_id}.pdf' % (sigmax)) plt.figure(f2.number) plt.tight_layout() dataio.save_current_figure('error_nontargets_hist_sigmax%.2f_Tall_{label}_{unique_id}.pdf' % (sigmax)) all_args = data_pbs.loaded_data['args_list'] if savedata: dataio.save_variables_default(locals()) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='error_distribution') plt.show() return locals()
def plots_errors_distribution(data_pbs, generator_module=None): ''' Reload responses Plot errors distributions. ''' #### SETUP # savefigs = True savedata = True plot_persigmax = True do_best_nontarget = False colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 angle_space = np.linspace(-np.pi, np.pi, 51) # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_responses_all = data_pbs.dict_arrays['result_responses']['results'] result_target_all = data_pbs.dict_arrays['result_target']['results'] result_nontargets_all = data_pbs.dict_arrays['result_nontargets']['results'] result_em_fits_all = data_pbs.dict_arrays['result_em_fits']['results'] T_space = data_pbs.loaded_data['parameters_uniques']['T'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] nb_repetitions = result_responses_all.shape[-1] N = result_responses_all.shape[-2] result_pval_vtest_nontargets = np.empty((sigmax_space.size, T_space.size))*np.nan print sigmax_space print T_space print result_responses_all.shape, result_target_all.shape, result_nontargets_all.shape, result_em_fits_all.shape dataio = DataIO.DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if plot_persigmax: for sigmax_i, sigmax in enumerate(sigmax_space): print "sigmax: ", sigmax # Compute the error between the response and the target errors_targets = utils.wrap_angles(result_responses_all[sigmax_i] - result_target_all[sigmax_i]) errors_nontargets = np.empty(result_nontargets_all[sigmax_i].shape) errors_best_nontarget = np.empty(errors_targets.shape) for T_i in xrange(1, T_space.size): for repet_i in xrange(nb_repetitions): # Could do a nicer indexing but f**k it # Compute the error between the responses and nontargets. errors_nontargets[T_i, :, :, repet_i] = utils.wrap_angles((result_responses_all[sigmax_i, T_i, :, repet_i, np.newaxis] - result_nontargets_all[sigmax_i, T_i, :, :, repet_i])) # Errors between the response the best nontarget. if do_best_nontarget: errors_best_nontarget[T_i, :, repet_i] = errors_nontargets[T_i, np.arange(errors_nontargets.shape[1]), np.nanargmin(np.abs(errors_nontargets[T_i, ..., repet_i]), axis=1), repet_i] for T_i, T in enumerate(T_space): print "T: ", T # Now, per T items, show the distribution of errors and of errors to non target # Error to target utils.hist_samples_density_estimation(utils.dropnan(errors_targets[T_i]), bins=angle_space, title='Errors between response and target, N=%d' % (T)) if savefigs: dataio.save_current_figure('error_target_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) if T > 1: # Error to nontarget ax_handle = utils.hist_samples_density_estimation(utils.dropnan(errors_nontargets[T_i, :, :T_i]), bins=angle_space, title='Errors between response and non targets, N=%d' % (T)) result_pval_vtest_nontargets[sigmax, T_i] = utils.V_test(utils.dropnan(errors_nontargets[T_i, :, :T_i]))['pvalue'] print result_pval_vtest_nontargets[sigmax, T_i] ax_handle.text(0.02, 0.97, "Vtest pval: %.2f" % (result_pval_vtest_nontargets[sigmax, T_i]), transform=ax_handle.transAxes, horizontalalignment='left', fontsize=12) if savefigs: dataio.save_current_figure('error_nontargets_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) # Error to best non target if do_best_nontarget: utils.hist_samples_density_estimation(utils.dropnan(errors_best_nontarget[T_i]), bins=angle_space, title='Errors between response and best non target, N=%d' % (T)) if savefigs: dataio.save_current_figure('error_bestnontarget_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) all_args = data_pbs.loaded_data['args_list'] if savedata: dataio.save_variables_default(locals()) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='error_distribution') plt.show() return locals()