def plots_fit_collapsedmixturemodels_random(data_pbs, generator_module=None): ''' Reload runs from PBS Sequential data analysis. ''' #### SETUP # plots_bestfits = True plots_scatter3d = False savefigs = True savedata = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] result_em_fits_collapsed_tr_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['results_flat']) result_em_fits_collapsed_summary_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_summary']['results_flat']) result_dist_gorgo11_sequ_collapsed_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed']['results_flat']) result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed_emmixt_KL']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['parameters_flat']) all_repeats_completed = data_pbs.dict_arrays['result_em_fits_collapsed_tr']['repeats_completed'] all_args_arr = np.array(data_pbs.loaded_data['args_list']) M_space = data_pbs.loaded_data['parameters_uniques']['M'] ratio_conj_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] alpha_space = data_pbs.loaded_data['parameters_uniques']['alpha'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load ground truth data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True) ## Filter everything with repeats_completed == num_repet filter_data = all_repeats_completed == num_repetitions - 1 result_parameters_flat = result_parameters_flat[filter_data] result_em_fits_collapsed_tr_flat = result_em_fits_collapsed_tr_flat[filter_data] result_em_fits_collapsed_summary_flat = result_em_fits_collapsed_summary_flat[filter_data] result_dist_gorgo11_sequ_collapsed_flat = result_dist_gorgo11_sequ_collapsed_flat[filter_data] result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat[filter_data] all_args_arr = all_args_arr[filter_data] all_repeats_completed = all_repeats_completed[filter_data] print "Size post-filter: ", result_parameters_flat.shape[0] # Compute lots of averages over the repetitions result_em_fits_collapsed_tr_flat_avg = utils.nanmean(result_em_fits_collapsed_tr_flat, axis=-1) result_em_fits_collapsed_summary_flat_avg = utils.nanmean(result_em_fits_collapsed_summary_flat, axis=-1) result_dist_gorgo11_sequ_collapsed_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_flat, axis=-1) result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat, axis=-1) result_dist_gorgo11_sequ_collapsed_flat_avg_overall = np.nansum(np.nansum(np.nansum(result_dist_gorgo11_sequ_collapsed_flat_avg, axis=-1), axis=-1), axis=-1) # We will now grid some of the parameters, to have a 2D/3D surface back. # Let's fix the ratio_conj, as we know that the other models need around # ratio =0.8 to fit data well. def str_best_params(best_i, result_dist_to_use): return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i] ###### Best fitting points if plots_bestfits: nb_best_points = 5 def plot_collapsed_modelfits(T_space, curr_result_emfits_collapsed_tr, labelplot='', dataio=None): f, ax = plt.subplots() for nitems_i, nitems in enumerate(T_space): ax = plots_experimental_data.plot_kappa_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], 0.0*curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], title='model fit fig7 %s' % labelplot , ax=ax, label='%d items' % nitems, xlabel='T_recall') if dataio is not None: dataio.save_current_figure('bestfit_doublepowerlaw_%s_kappa_{label}_{unique_id}.pdf' % labelplot) _, ax_target = plt.subplots() _, ax_nontarget = plt.subplots() _, ax_random = plt.subplots() for nitems_i, nitems in enumerate(T_space): ax_target = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems]*0.0, title='Target model fit %s' % labelplot, ax=ax_target, label='%d items' % nitems, xlabel='T_recall') ax_nontarget = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems]*0.0, title='Nontarget model fit %s' % labelplot, ax=ax_nontarget, label='%d items' % nitems, xlabel='T_recall') ax_random = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems]*0.0, title='Random model fit %s' % labelplot, ax=ax_random, label='%d items' % nitems, xlabel='T_recall') if dataio is not None: plt.figure(ax_target.get_figure().number) dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixttarget_{label}_{unique_id}.pdf' % labelplot) plt.figure(ax_nontarget.get_figure().number) dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtnontarget_{label}_{unique_id}.pdf' % labelplot) plt.figure(ax_random.get_figure().number) dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtrandom_{label}_{unique_id}.pdf' % labelplot) best_points_result_dist_gorgo11seq_all = np.argsort(result_dist_gorgo11_sequ_collapsed_flat_avg_overall)[:nb_best_points] for best_point_i in best_points_result_dist_gorgo11seq_all: plot_collapsed_modelfits(T_space, result_em_fits_collapsed_tr_flat_avg[best_point_i], labelplot='%.1f' % result_dist_gorgo11_sequ_collapsed_flat_avg_overall[best_point_i], dataio=dataio) ###### 3D scatter plots if plots_scatter3d: nb_best_points = 30 size_normal_points = 8 size_best_points = 50 def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''): result_dist_to_use = all_vars[result_dist_to_use_name] result_parameters_flat_3d = all_vars['result_parameters_flat_3d'] # Filter if downsampling filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling) result_dist_to_use = result_dist_to_use[filter_downsampling] result_parameters_flat_3d = result_parameters_flat_3d[filter_downsampling] best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] # Construct all permutations of 3 parameters, for 3D scatters params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat_3d.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)]) for param_permut in params_permutations: fig = plt.figure() ax = Axes3D(fig) # One plot per parameter permutation if log_color: color_points = np.log(result_dist_to_use) else: color_points = result_dist_to_use utils.scatter3d(result_parameters_flat_3d[:, param_permut[0]], result_parameters_flat_3d[:, param_permut[1]], result_parameters_flat_3d[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax) utils.scatter3d(result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax) if savefigs: dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name if False and savefigs: ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) # plt.close('all') print "Parameters: %s" % ', '.join(parameter_names_sorted) print "Best points, %s:" % title print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use]) # all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='gorgo11_sequential_fitmixturemodel') plt.show() return locals()
def launcher_do_fit_mixturemodels_sequential_alltrecall(args): ''' Run the model for 1..T items sequentially, for all possible trecall/T. Compute: - Precision of samples - EM mixture model fits. Both independent and collapsed model. - Theoretical Fisher Information - EM Mixture model distances to set of currently working datasets. ''' print "Doing a piece of work for launcher_do_fit_mixturemodels_sequential_alltrecall" all_parameters = utils.argparse_2_dict(args) print all_parameters if all_parameters['burn_samples'] + all_parameters['num_samples'] < 200: print "WARNING> you do not have enough samples I think!", all_parameters['burn_samples'] + all_parameters['num_samples'] # Create DataIO # (complete label with current variable state) dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters)) save_every = 1 run_counter = 0 # Load dataset to compare against data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(data_dir=all_parameters['experiment_data_dir'], fit_mixture_model=True) gorgo11_sequ_T_space = np.unique(data_gorgo11_sequ['n_items']) # Parameters to vary T_max = all_parameters['T'] T_space = np.arange(1, T_max+1) repetitions_axis = -1 # Result arrays result_all_precisions = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_fi_theo = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_fi_theocov = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_em_fits = np.nan*np.empty((T_space.size, T_space.size, 6, all_parameters['num_repetitions'])) # kappa, mixt_target, mixt_nontarget, mixt_random, ll, bic result_em_fits_collapsed_tr = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions'])) # kappa_tr, mixt_target_tr, mixt_nontarget_tr, mixt_random_tr result_em_fits_collapsed_summary = np.nan*np.empty((5, all_parameters['num_repetitions'])) # bic, ll, kappa_theta result_dist_gorgo11_sequ = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions'])) # kappa, mixt_target, mixt_nontarget, mixt_random result_dist_gorgo11_sequ_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_dist_gorgo11_sequ_collapsed = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions'])) result_dist_gorgo11_sequ_collapsed_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) gorgo11_sequ_collapsed_mixtmod_mean = data_gorgo11_sequ['collapsed_em_fits_doublepowerlaw_array'] # If desired, will automatically save all Model responses. if all_parameters['collect_responses']: print "--- Collecting all responses..." result_responses = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions'])) result_target = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions'])) result_nontargets = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], T_max-1, all_parameters['num_repetitions'])) search_progress = progress.Progress(T_space.size*(T_space.size + 1)/2.*all_parameters['num_repetitions']) for repet_i in xrange(all_parameters['num_repetitions']): for T_i, T in enumerate(T_space): for trecall_i, trecall in enumerate(np.arange(T, 0, -1)): # Inverting indexing of trecall, to be consistent. trecall_i 0 == last item. # But trecall still means the actual time of recall! print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str()) print "Fit for T=%d, tr=%d, %d/%d" % (T, trecall, repet_i+1, all_parameters['num_repetitions']) # Update parameter all_parameters['T'] = T all_parameters['fixed_cued_feature_time'] = trecall - 1 ### WORK WORK WORK work? ### # Instantiate (_, _, _, sampler) = launchers.init_everything(all_parameters) # Sample sampler.run_inference(all_parameters) # Compute precision print "get precision..." result_all_precisions[T_i, trecall_i, repet_i] = sampler.get_precision() # Fit mixture model, independent print "fit mixture model..." curr_params_fit = sampler.fit_mixture_model(use_all_targets=False) result_em_fits[T_i, trecall_i, :, repet_i] = [curr_params_fit[key] for key in ['kappa', 'mixt_target', 'mixt_nontargets_sum', 'mixt_random', 'train_LL', 'bic']] # Compute fisher info print "compute fisher info" result_fi_theo[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=False) result_fi_theocov[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=True) # Compute distances to datasets (this is for the non-collapsed stuff, not the best) if T in gorgo11_sequ_T_space: gorgo11_sequ_mixtures_mean = data_gorgo11_sequ['em_fits_nitems_trecall_arrays'][gorgo11_sequ_T_space==T, trecall_i, :4].flatten() result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i] = (gorgo11_sequ_mixtures_mean - result_em_fits[T_i, trecall_i, :4, repet_i])**2. result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i] = utils.KL_div(result_em_fits[T_i, trecall_i, 1:4, repet_i], gorgo11_sequ_mixtures_mean[1:]) # If needed, store responses if all_parameters['collect_responses']: print "collect responses" (responses, target, nontarget) = sampler.collect_responses() result_responses[T_i, trecall_i, :, repet_i] = responses result_target[T_i, trecall_i, :, repet_i] = target result_nontargets[T_i, trecall_i, :, :T_i, repet_i] = nontarget print "CURRENT RESULTS:\n", result_all_precisions[T_i, trecall_i, repet_i], curr_params_fit, result_fi_theo[T_i, trecall_i, repet_i], result_fi_theocov[T_i, trecall_i, repet_i], np.sum(result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i]), np.sum(result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i]), "\n" ### /Work ### search_progress.increment() if run_counter % save_every == 0 or search_progress.done(): dataio.save_variables_default(locals()) run_counter += 1 # Fit Collapsed mixture model # TODO check dimensionality... print 'Fitting Collapsed double powerlaw mixture model...' params_fit = em_circularmixture_parametrickappa_doublepowerlaw.fit(T_space, result_responses[..., repet_i], result_target[..., repet_i], result_nontargets[..., repet_i], debug=False) # First store the parameters that depend on T/trecall for i, key in enumerate(['kappa', 'mixt_target_tr', 'mixt_nontargets_tr', 'mixt_random_tr']): result_em_fits_collapsed_tr[..., i, repet_i] = params_fit[key] # Then the ones that do not, only one per full collapsed fit. result_em_fits_collapsed_summary[0, repet_i] = params_fit['bic'] # result_em_fits_collapsed_summary[1, repet_i] = params_fit['train_LL'] result_em_fits_collapsed_summary[2:, repet_i] = params_fit['kappa_theta'] # Compute distances to dataset for collapsed model result_dist_gorgo11_sequ_collapsed[..., repet_i] = (gorgo11_sequ_collapsed_mixtmod_mean - result_em_fits_collapsed_tr[..., repet_i])**2. result_dist_gorgo11_sequ_collapsed_emmixt_KL[..., repet_i] = utils.KL_div(result_em_fits_collapsed_tr[..., 1:4, repet_i], gorgo11_sequ_collapsed_mixtmod_mean[..., 1:], axis=-1) # Finished dataio.save_variables_default(locals()) print "All finished" return locals()
def plots_fit_mixturemodels_random(data_pbs, generator_module=None): """ Reload runs from PBS """ #### SETUP # savefigs = True savedata = True colormap = None # or 'cubehelix' plt.rcParams["font.size"] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos["parameters"] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"] result_responses_flat = np.array(data_pbs.dict_arrays["result_responses"]["results_flat"]) result_targets_flat = np.array(data_pbs.dict_arrays["result_target"]["results_flat"]) result_nontargets_flat = np.array(data_pbs.dict_arrays["result_nontargets"]["results_flat"]) result_parameters_flat = np.array(data_pbs.dict_arrays["result_responses"]["parameters_flat"]) all_repeats_completed = data_pbs.dict_arrays["result_responses"]["repeats_completed"] all_args_arr = np.array(data_pbs.loaded_data["args_list"]) M_space = data_pbs.loaded_data["parameters_uniques"]["M"] ratio_conj_space = data_pbs.loaded_data["parameters_uniques"]["ratio_conj"] sigmax_space = data_pbs.loaded_data["parameters_uniques"]["sigmax"] alpha_space = data_pbs.loaded_data["parameters_uniques"]["alpha"] trecall_space = data_pbs.loaded_data["parameters_uniques"]["fixed_cued_feature_time"] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos["parameters"] dataio = DataIO( output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/", label="global_" + dataset_infos["save_output_filename"], ) ##### Because of lazyness, the responses are weird. # Each run is for a given trecall. But we run N items= 1 .. Nmax anyway # so if trecall > N, you have np.nan # => Need to reconstruct the thing properly, to have lower triangle of Nitem x Trecall filled # Also, trecall is the actual Time. Hence we need to change its meaning to be Tmax- (trecall + 1) or whatever. # Load ground truth data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True) ## Filter everything with repeats_completed == num_repet and trecall=last filter_data = (result_parameters_flat[:, 0] == (T_space.max() - 1)) & (all_repeats_completed == num_repetitions - 1) result_parameters_flat = result_parameters_flat[filter_data] result_responses_flat = result_responses_flat[filter_data] result_targets_flat = result_targets_flat[filter_data] result_nontargets_flat = result_nontargets_flat[filter_data] all_args_arr = all_args_arr[filter_data] all_repeats_completed = all_repeats_completed[filter_data] print "Size post-filter: ", result_parameters_flat.shape[0] def str_best_params(best_i, result_dist_to_use): return ( " ".join( [ "%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted)) ] ) + " >> %f" % result_dist_to_use[best_i] ) # all_args = data_pbs.loaded_data['args_list'] variables_to_save = ["parameter_names_sorted", "all_args_arr", "all_repeats_completed", "filter_data"] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="gorgo11_sequential_fitmixturemodel") plt.show() return locals()