Esempio n. 1
0
def launcher_do_nontarget_bootstrap(args):
    '''
        Compute a bootstrap estimate, using outputs from the model run earlier
    '''

    print "Doing a piece of work for launcher_do_nontarget_bootstrap"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'mixed':
        # Mixed runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy'))
    else:
        # Conjunctive runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy'))

    data_responses_all = model_outputs['result_responses_all'][..., 0]
    data_target_all = model_outputs['result_target_all'][..., 0]
    data_nontargets_all = model_outputs['result_nontargets_all'][..., 0]
    T_space = model_outputs['T_space']
    sigmax_space = model_outputs['sigmax_space']

    K = data_nontargets_all.shape[-1]

    # Result arrays
    result_bootstrap_samples_allitems = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, K*all_parameters['num_repetitions']))

    search_progress = progress.Progress(sigmax_space.size*(T_space.size-1))

    for sigmax_i, sigmax in enumerate(sigmax_space):
        for T_i, T in enumerate(T_space[1:]):
            T_i += 1

            print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

            print "Bootstrap for T=%d, sigmax=%.2f, %d bootstrap samples" % (T, sigmax, all_parameters['num_repetitions'])

            # Update parameter

            ### WORK WORK WORK work? ###

            # Get some bootstrap samples
            bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, T_i],
                    data_target_all[sigmax_i, T_i],
                    data_nontargets_all[sigmax_i, T_i, :, :T_i],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)
            # bootstrap_allitems_nontargets_allitems = em_circularmixture_allitems.bootstrap_nontarget_stat(
            #         data_responses_all[sigmax_i, T_i],
            #         data_target_all[sigmax_i, T_i],
            #         data_nontargets_all[sigmax_i, T_i, :, :T_i],
            #         nb_bootstrap_samples=all_parameters['num_repetitions'],
            #         resample_targets=False)
            bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, T_i],
                    data_target_all[sigmax_i, T_i],
                    data_nontargets_all[sigmax_i, T_i, :, :T_i],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)

            # Collect and store responses
            # result_bootstrap_samples_allitems[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems['nontarget_bootstrap_samples']
            result_bootstrap_samples[sigmax_i, T_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples']

            result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples']
            result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i, :all_parameters['num_repetitions']*T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples']

            print result_bootstrap_samples_allitems[sigmax_i, T_i]
            print result_bootstrap_samples[sigmax_i, T_i]
            print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]
            print result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]

            ### /Work ###

            search_progress.increment()
            if run_counter % save_every == 0 or search_progress.done():
                dataio.save_variables_default(locals())
            run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
Esempio n. 2
0
 def load(self):
   for idx, (name, array) in enumerate(
       zip(['actions', 'rewards', 'screens', 'terminals', 'prestates', 'poststates'],
           [self.actions, self.rewards, self.screens, self.terminals, self.prestates, self.poststates])):
     array = load_npy(os.path.join(self.model_dir, name))
Esempio n. 3
0
def launcher_do_nontarget_bootstrap_misbindingruns(args):
    '''
        Compute a bootstrap estimate, using outputs from a Misbinding generator run.
    '''

    print "Doing a piece of work for launcher_do_nontarget_bootstrap"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'mixed' or all_parameters['subaction'] == '':
        # Mixed runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'SAVE_global_plots_misbinding_logposterior-plots_misbinding_logposterior-36eb41e9-6370-453e-995e-3876d5105388.npy'))

    data_responses_all = model_outputs['result_all_thetas']
    data_target = model_outputs['target_angle']
    data_nontargets = model_outputs['nontarget_angles']
    ratio_space = model_outputs['ratio_space']

    # Result arrays
    result_bootstrap_samples_allitems = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))

    search_progress = progress.Progress(ratio_space.size)

    for ratio_conj_i, ratio_conj in enumerate(ratio_space):
        print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

        print "Bootstrap for ratio=%.2f, %d bootstrap samples" % (ratio_conj, all_parameters['num_repetitions'])

        ### WORK WORK WORK work? ###

        # Get some bootstrap samples
        bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                data_responses_all[ratio_conj_i],
                data_target,
                data_nontargets,
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False)

        bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat(
                data_responses_all[ratio_conj_i],
                data_target,
                data_nontargets,
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False)

        # Collect and store responses
        result_bootstrap_samples[ratio_conj_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples']

        result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples']
        result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples']

        print result_bootstrap_samples_allitems[ratio_conj_i]
        print result_bootstrap_samples[ratio_conj_i]
        print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i]
        print result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i]

        ### /Work ###

        search_progress.increment()
        if run_counter % save_every == 0 or search_progress.done():
            dataio.save_variables_default(locals())
        run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
def plots_fitting_experiments_random(data_pbs, generator_module=None):
    '''
        Reload 2D volume runs from PBS and plot them

    '''

    #### SETUP
    #
    savefigs = True
    savedata = True
    savemovies = False

    do_bays09 = True
    do_gorgo11 = True

    scatter3d_sumT = False
    plots_flat_sorted_performance = False
    plots_memorycurves_fits_best = True

    nb_best_points = 20
    nb_best_points_per_T = nb_best_points/6
    size_normal_points = 8
    size_best_points = 50
    downsampling = 2


    # do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()
    # parameters: ratio_conj, sigmax, T

    # Extract data
    result_fitexperiments_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['results_flat'])
    result_fitexperiments_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_all']['results_flat'])
    result_fitexperiments_noiseconv_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv']['results_flat'])
    result_fitexperiments_noiseconv_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv_all']['results_flat'])
    result_parameters_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['parameters_flat'])

    all_repeats_completed = data_pbs.dict_arrays['result_fitexperiments']['repeats_completed']
    all_args = data_pbs.loaded_data['args_list']
    all_args_arr = np.array(all_args)
    num_repetitions = generator_module.num_repetitions

    # Extract order of datasets
    experiment_ids = data_pbs.loaded_data['datasets_list'][0]['fitexperiment_parameters']['experiment_ids']
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # filter_data = (result_parameters_flat[:, -1] < 1.0) & (all_repeats_completed == num_repetitions - 1)
    # filter_data = (all_repeats_completed == num_repetitions - 1)
    # result_fitexperiments_flat = result_fitexperiments_flat[filter_data]
    # result_fitexperiments_all_flat = result_fitexperiments_all_flat[filter_data]
    # result_fitexperiments_noiseconv_flat = result_fitexperiments_noiseconv_flat[filter_data]
    # result_fitexperiments_noiseconv_all_flat = result_fitexperiments_noiseconv_all_flat[filter_data]
    # result_parameters_flat = result_parameters_flat[filter_data]

    # Compute some stuff
    # Data is summed over all experiments for _flat, contains bic, ll and ll90.
    # for _all_flat, contains bic, ll and ll90 per experiment. Given that Gorgo11 and Bays09 are incompatible, shouldn't really use the combined version directly!
    result_fitexperiments_noiseconv_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_flat, axis=-1)[..., 0]
    result_fitexperiments_noiseconv_allexp_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, 0]
    result_fitexperiments_noiseconv_allexp_ll90_avg_allT = -utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, -1]

    ### BIC
    # result_fitexperiments_noiseconv_allexp_bic_avg_allT: N x T x exp
    result_fitexperiments_noiseconv_bays09_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 0]
    result_fitexperiments_noiseconv_gorgo11_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 1]
    result_fitexperiments_noiseconv_dualrecall_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 2]
    # Summed T
    result_fitexperiments_noiseconv_bays09_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_bic_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_bic_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_bic_avg_allT, axis=-1)

    ### LL90
    # N x T x exp
    result_fitexperiments_noiseconv_bays09_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 0]
    result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 1]
    result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 2]
    # Summed T
    result_fitexperiments_noiseconv_bays09_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_ll90_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT, axis=-1)

    def mask_outliers_array(result_dist_to_use, sigma_outlier=3):
        '''
            Mask outlier datapoints.
            Compute the mean of the results and assume that points with:
              result > mean + sigma_outlier*std
            are outliers.

            As we want the minimum values, do not mask small values
        '''
        return np.ma.masked_greater(result_dist_to_use, np.mean(result_dist_to_use) + sigma_outlier*np.std(result_dist_to_use))

    def best_points_allT(result_dist_to_use):
        '''
            Best points for all T
        '''
        return np.argsort(result_dist_to_use)[:nb_best_points]

    def str_best_params(best_i, result_dist_to_use):
        return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i]

    def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file='', mask_outliers=True):

        result_dist_to_use = all_vars[result_dist_to_use_name]
        result_parameters_flat = all_vars['result_parameters_flat']

        # Filter if downsampling
        filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling)
        result_dist_to_use = result_dist_to_use[filter_downsampling]
        result_parameters_flat = result_parameters_flat[filter_downsampling]

        if mask_outliers:
            result_dist_to_use = mask_outliers_array(result_dist_to_use)

        best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

        # Construct all permutations of 3 parameters, for 3D scatters
        params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)])

        for param_permut in params_permutations:
            fig = plt.figure()
            ax = Axes3D(fig)

            # One plot per parameter permutation
            if log_color:
                color_points = np.log(result_dist_to_use)
            else:
                color_points = result_dist_to_use

            utils.scatter3d(result_parameters_flat[:, param_permut[0]], result_parameters_flat[:, param_permut[1]], result_parameters_flat[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax)

            utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax)

            if savefigs:
                dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

            if savemovies:
                try:
                    utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8)
                    utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8)
                except Exception:
                    # Most likely wrong aggregator...
                    print "failed when creating movies for ", result_dist_to_use_name


            if False and savefigs:
                ax.view_init(azim=90, elev=10)
                dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

            # plt.close('all')

        print "Parameters: %s" % ', '.join(parameter_names_sorted)
        print "Best points, %s:" % title
        print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use])



    if scatter3d_sumT:

        plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'BIC Bays09')
        plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT', 'LL90 Bays09')

        plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'BIC Gorgo11')
        plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', 'LL90 Gorgo11')

        plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT', 'BIC Dual recall')
        plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', 'LL90 Dual recall')


    if plots_flat_sorted_performance:
        result_dist_to_try = []
        if do_bays09:
            result_dist_to_try.extend(['result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT'])
        if do_gorgo11:
            result_dist_to_try.extend(['result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT'])

        for result_dist in result_dist_to_try:
            order_indices = np.argsort(locals()[result_dist])[::-1]

            f, axes = plt.subplots(2, 1)
            axes[0].plot(np.arange(4) + result_parameters_flat[order_indices]/np.max(result_parameters_flat[order_indices], axis=0))
            axes[0].legend(parameter_names_sorted, loc='upper left')
            axes[0].set_ylabel('Parameters')
            axes[1].plot(locals()[result_dist][order_indices])
            axes[1].set_ylabel(result_dist.split('result_dist_')[-1])
            axes[0].set_title('Distance ordered ' + result_dist.split('result_dist_')[-1])
            f.canvas.draw()

            if savefigs:
                dataio.save_current_figure('plot_sortedperf_full_%s_{label}_{unique_id}.pdf' % (result_dist))

    if plots_memorycurves_fits_best:
        # Alright, will actually reload the data from another set of runs, and find the closest parameter set to the ones found here.
        data = utils.load_npy('normalisedsigmaxsigmaoutput_random_fitmixturemodels_sigmaxMratiosigmaoutput_repetitions3_280814/outputs/global_plots_fitmixtmodel_random_sigmaoutsigmaxnormMratio-plots_fit_mixturemodels_random-75eb9c74-72e0-4165-8014-92c1ef446f0a.npy')
        result_em_fits_flat_fitmixture = data['result_em_fits_flat']
        result_parameters_flat_fitmixture = data['result_parameters_flat']
        all_args_arr_fitmixture = data['all_args_arr']

        data_dir = None
        if not os.environ.get('WORKDIR_DROP'):
            data_dir = '../experimental_data/'

        plotting_parameters = launchers_memorycurves_marginal_fi.load_prepare_datasets()

        def plot_memorycurves_fits_fromexternal(all_vars, result_dist_to_use_name, nb_best_points=10):
            result_dist_to_use = all_vars[result_dist_to_use_name]

            result_em_fits_flat_fitmixture = all_vars['result_em_fits_flat_fitmixture']
            result_parameters_flat_fitmixture = all_vars['result_parameters_flat_fitmixture']
            all_args_arr_fitmixture = all_vars['all_args_arr_fitmixture']

            best_point_indices_result_dist = np.argsort(result_dist_to_use)[:nb_best_points]

            for best_point_index in best_point_indices_result_dist:
                print "extended plot desired for: " + str_best_params(best_point_index, result_dist_to_use)

                dist_best_points_fitmixture = np.abs(result_parameters_flat_fitmixture - result_parameters_flat[best_point_index])
                dist_best_points_fitmixture -= np.min(dist_best_points_fitmixture, axis=0)
                dist_best_points_fitmixture /= np.max(dist_best_points_fitmixture, axis=0)

                best_point_index_fitmixture = np.argmax(np.prod(1-dist_best_points_fitmixture, axis=-1))

                print "found closest: " + ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat_fitmixture[best_point_index_fitmixture, param_i]) for param_i in xrange(len(parameter_names_sorted))])

                # Update arguments
                all_args_arr_fitmixture[best_point_index_fitmixture].update(dict(zip(parameter_names_sorted, result_parameters_flat_fitmixture[best_point_index_fitmixture])))
                packed_data = dict(T_space=T_space, result_em_fits=result_em_fits_flat_fitmixture[best_point_index_fitmixture], all_parameters=all_args_arr_fitmixture[best_point_index_fitmixture])

                plotting_parameters['suptitle'] = result_dist_to_use_name
                plotting_parameters['reuse_axes'] = False
                if savefigs:
                    packed_data['dataio'] = dataio

                launchers_memorycurves_marginal_fi.do_memory_plots(packed_data, plotting_parameters)


        plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_bays09_ll90_avg_sumT', nb_best_points=3)

        plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', nb_best_points=3)

        plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', nb_best_points=3)



    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['experiment_ids', 'parameter_names_sorted', 'T_space', 'all_args_arr', 'all_repeats_completed', 'filter_data']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='sigmaoutput_normalisedsigmax_random')


    plt.show()

    return locals()
    def model_fn(features, labels, mode, params):
        inputs = []
        if FLAGS.is_training:
            for feature_name in [
                    "ids", "mask", "seg_ids", "prob_logits", "labels"
            ]:
                inputs.append(features[feature_name])
        else:
            for feature_name in ["ids", "mask", "seg_ids", "labels"]:
                inputs.append(features[feature_name])

        if FLAGS.emb_pathes and FLAGS.is_training:
            pathes = FLAGS.emb_pathes.split(',')
            pretrained_word_embeddings = load_npy(pathes[0])
            pretrained_pos_embeddings = load_npy(pathes[1])
        else:
            pretrained_word_embeddings, pretrained_pos_embeddings = None, None

        Kmax = 8
        given_arch = None
        if FLAGS.arch_path:
            Kmax, given_arch = load_arch(FLAGS.arch_path)

        model = AdaBERTStudent(
            inputs, (mode == tf.estimator.ModeKeys.TRAIN),
            vocab_size=FLAGS.num_token,
            is_pair_task=bool(FLAGS.is_pair_task),
            num_classes=FLAGS.num_classes,
            Kmax=Kmax,
            emb_size=FLAGS.embed_size,
            seq_len=FLAGS.seq_length,
            keep_prob=0.9 if mode == tf.estimator.ModeKeys.TRAIN else 1.0,
            temp_decay_steps=FLAGS.temp_decay_steps,
            model_opt_lr=FLAGS.model_opt_lr,
            arch_opt_lr=FLAGS.arch_opt_lr,
            model_l2_reg=FLAGS.model_l2_reg,
            arch_l2_reg=FLAGS.arch_l2_reg,
            loss_gamma=FLAGS.loss_gamma,
            loss_beta=FLAGS.loss_beta,
            pretrained_word_embeddings=pretrained_word_embeddings,
            pretrained_pos_embeddings=pretrained_pos_embeddings,
            given_arch=given_arch)

        if mode == tf.estimator.ModeKeys.TRAIN:
            logging_tensors = dict([(var.name, var)
                                    for var in model.arch_params])
            logging_tensors['step'] = model.global_step
            logging_tensors['loss'] = model.loss
            logging_hook = tf.train.LoggingTensorHook(logging_tensors,
                                                      every_n_iter=50)
            chief_only_hooks = [logging_hook]
            if given_arch is None:
                search_result_hook = SearchResultsSaver(
                    model.global_step, model.arch_params, model.ld_embs,
                    FLAGS.model_dir, FLAGS.save_steps)
                chief_only_hooks.append(search_result_hook)
            # handle the save/restore related issues
            if FLAGS.searched_model:
                # has pretrained
                tvars = tf.trainable_variables()
                initialized_variable_names = {}
                init_checkpoint = os.path.join(FLAGS.searched_model)
                tf.logging.info("Init from %s" % init_checkpoint)
                (assignment_map, initialized_variable_names
                 ) = get_assignment_map_from_checkpoint(
                     tvars, init_checkpoint, ["wemb", "pemb"])
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                tf.logging.info("**** Trainable Variables ****")
                for var in tvars:
                    init_string = ""
                    if var.name in initialized_variable_names:
                        init_string = ", *INIT_FROM_CKPT*"
                    tf.logging.info("  name = %s, shape = %s%s", var.name,
                                    var.shape, init_string)
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=model.loss,
                train_op=model.update,
                training_chief_hooks=chief_only_hooks)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # Define the metrics:
            metrics_dict = {
                'Acc': model.acc,
                #'AUC': tf.metrics.auc(train_labels, probabilities, num_thresholds=2000)
            }
            return tf.estimator.EstimatorSpec(mode,
                                              loss=model.loss,
                                              eval_metric_ops=metrics_dict)
        else:
            if FLAGS.is_training:
                predictions = dict()
                predictions["predicted"] = model.predictions
                predictions["labels"] = features["labels"]
            else:
                predictions = features.copy()
                predictions["logits"] = model.logits
                predictions["predicted"] = model.predictions
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)
def plots_logposterior_mixed_autoset(data_pbs, generator_module=None):
    '''
        Reload 2D volume runs from PBS and plot them

    '''

    #### SETUP
    #
    savefigs = True
    plot_per_ratio = False
    plot_2d_pcolor = False

    do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_log_posterior_mean = np.squeeze(data_pbs.dict_arrays['result_log_posterior_mean']['results'])
    result_log_posterior_std = np.squeeze(data_pbs.dict_arrays['result_log_posterior_std']['results'])

    ratio_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']

    exp_dataset = data_pbs.loaded_data['args_list'][0]['experiment_id']

    print ratio_space
    print sigmax_space
    print result_log_posterior_mean.shape, result_log_posterior_std.shape

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])


    if plot_per_ratio:
        # Plot the evolution of loglike as a function of sigmax, with std shown
        for ratio_conj_i, ratio_conj in enumerate(ratio_space):
            ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i])

            ax.get_figure().canvas.draw()

            if savefigs:
                dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj))

    if plot_2d_pcolor:
        # Plot the mean loglikelihood as a 2d surface
        utils.pcolor_2d_data(result_log_posterior_mean, x=ratio_space, y=sigmax_space, xlabel="Ratio conj", ylabel="Sigma x", title="Loglikelihood of experimental data, \n3 items dualrecall, rcscale automatically set", ticks_interpolate=5, cmap=colormap)
        # plt.tight_layout()

        if savefigs:
            dataio.save_current_figure('results_fitexp_%s_loglike_2d_ratiosigmax_{label}_global_{unique_id}.pdf' % exp_dataset)

    if do_relaunch_bestparams_pbs:
        # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots.

        # We can actually use the original dictionary for submission informations, and just change the launcher to one produces the appropriate results
        generator_parameters_dict = generator_module.__dict__
        generator_parameters_dict['pbs_submission_infos']['other_options'].update(dict(
            action_to_do='launcher_do_memory_curve_marginal_fi',
            subaction='collect_responses',
            inference_method='sample',
            N=300,
            T=6,
            num_samples=500,
            selection_method='last',
            num_repetitions=3,
            burn_samples=500,
            stimuli_generation='random',
            stimuli_generation_recall='random',
            session_id='fitting_experiments_relaunchs',
            result_computation='filenameoutput'))
        generator_parameters_dict['pbs_submission_infos']['other_options']['label'] = 'fitting_experiment_rerun_nitems{T}M{M}_090414'.format(T = generator_parameters_dict['n_items_to_fit'], M = generator_parameters_dict['M'])
        generator_parameters_dict['sleeping_period'] = dict(min=5, max=10)

        submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=generator_parameters_dict['pbs_submission_infos'], debug=True)

        # Now run a series of Jobs to obtain the required data
        # the "Result" of them are the filename of their output. Should then save those in a dictionary.
        # For later processing, if the exact same parameters are used, then everything will be reloaded automatically, MAAAGIC.

        # Extract the parameters to try
        best_params_to_try = []
        best_axis2_i_all = np.argmax(result_log_posterior_mean, axis=1)
        for axis1_i, best_axis2_i in enumerate(best_axis2_i_all):
            parameter_dict = dict(ratio_conj=ratio_space[axis1_i], sigmax=sigmax_space[best_axis2_i])

            best_params_to_try.append(parameter_dict)

        # Submit them, waiting on them in the process. Should obtain a list of filenames back
        utils.chdir_safe(generator_parameters_dict['pbs_submission_infos']['simul_out_dir'])
        result_minibatch_filenames = submit_pbs.submit_minibatch_jobswrapper(best_params_to_try, generator_parameters_dict)

        result_reloaded_datasets = []
        for filename_i, result_filename in enumerate(result_minibatch_filenames):
            curr_reloaded_dataset = utils.load_npy(result_filename + '.npy')
            result_reloaded_datasets.append(curr_reloaded_dataset)

        utils.chdir_safe('../')






    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['exp_dataset']

    if savefigs:
        dataio.save_variables_default(locals(), variables_to_save)


    plt.show()

    return locals()
def plots_boostrap(data_pbs, generator_module=None):
    '''
        Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use.
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True

    load_fit_bootstrap = True
    plots_hist_cdf = True
    estimate_bootstrap = True

    should_fit_bootstrap = True
    # caching_bootstrap_filename = None
    caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap.pickle')

    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_sumnontarget']['results'])
    result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples']['results'])
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_allnontarget']['results'])


    sigmax_space = data_pbs.loaded_data['datasets_list'][0]['sigmax_space']
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape
    print result_bootstrap_samples.shape
    print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    if load_fit_bootstrap:
        if caching_bootstrap_filename is not None:

            if os.path.exists(caching_bootstrap_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_bootstrap_filename, 'r') as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T']
                        bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T']
                        bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T']
                        should_fit_bootstrap = False

                except IOError:
                    print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits"

        if should_fit_bootstrap:

            bootstrap_ecdf_bays_sigmax_T = dict()
            bootstrap_ecdf_allitems_sum_sigmax_T = dict()
            bootstrap_ecdf_allitems_all_sigmax_T = dict()

            # Fit bootstrap
            for sigmax_i, sigmax in enumerate(sigmax_space):
                for T_i, T in enumerate(T_space):
                    if T>1:
                        # One bootstrap CDF per condition
                        bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]))
                        bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]))
                        bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]))


                        # Store in a dict(sigmax) -> dict(T) -> ECDF object
                        bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax)
                        bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax)
                        bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax)



            # Save everything to a file, for faster later plotting
            if caching_bootstrap_filename is not None:
                try:
                    with open(caching_bootstrap_filename, 'w') as filecache_out:
                        data_bootstrap = dict(bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T, bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T)
                        pickle.dump(data_bootstrap, filecache_out, protocol=2)
                except IOError:
                    print "Error writing out to caching file ", caching_bootstrap_filename


    if plots_hist_cdf:
        ## Plots now
        for sigmax_i, sigmax in enumerate(sigmax_space):
            for T_i, T in enumerate(T_space):
                if T > 1:
                    # Histogram of samples
                    _, axes = plt.subplots(ncols=3, figsize=(18, 6))
                    axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed='density')
                    axes[0].set_xlim([0.0, 1.0])
                    axes[1].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]), bins=100, normed='density')
                    axes[1].set_xlim([0.0, 1.0])
                    axes[2].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]), bins=100, normed='density')
                    axes[2].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure('hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T))

                    # ECDF now
                    _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6))
                    axes[0].plot(bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2)
                    axes[0].set_xlim([0.0, 1.0])
                    axes[1].plot(bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2)
                    axes[1].set_xlim([0.0, 1.0])
                    axes[2].plot(bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2)
                    axes[2].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure('ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T))

    if estimate_bootstrap:
        ## Should be in reloader_error_distribution_mixed_121113 instead
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy'))

        data_responses_all = model_outputs['result_responses_all'][..., 0]
        data_target_all = model_outputs['result_target_all'][..., 0]
        data_nontargets_all = model_outputs['result_nontargets_all'][..., 0]

        # Compute bootstrap p-value
        result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan
        result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan
        for sigmax_i, sigmax in enumerate(sigmax_space):
            for T in T_space[1:]:
                bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf'])

                result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value']
                result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value']

                print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05)

    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['nb_repetitions']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)

        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets')


    plt.show()


    return locals()
def plots_boostrap(data_pbs, generator_module=None):
    """
        Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use.
    """

    #### SETUP
    #
    savefigs = True
    savedata = True

    load_fit_bootstrap = True
    plots_hist_cdf = False
    estimate_bootstrap = False

    should_fit_bootstrap = True
    # caching_bootstrap_filename = None
    caching_bootstrap_filename = os.path.join(
        generator_module.pbs_submission_infos["simul_out_dir"], "outputs", "cache_bootstrap.pickle"
    )

    plt.rcParams["font.size"] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze(
        data_pbs.dict_arrays["result_bootstrap_samples_allitems_uniquekappa_sumnontarget"]["results"]
    )
    result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays["result_bootstrap_samples"]["results"])
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze(
        data_pbs.dict_arrays["result_bootstrap_samples_allitems_uniquekappa_allnontarget"]["results"]
    )

    sigmax_space = data_pbs.loaded_data["datasets_list"][0]["sigmax_space"]
    T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"]

    print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape
    print result_bootstrap_samples.shape
    print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape

    dataio = DataIO(
        output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/",
        label="global_" + dataset_infos["save_output_filename"],
    )

    if load_fit_bootstrap:
        if caching_bootstrap_filename is not None:

            if os.path.exists(caching_bootstrap_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_bootstrap_filename, "r") as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        bootstrap_ecdf_bays_sigmax_T = cached_data["bootstrap_ecdf_bays_sigmax_T"]
                        bootstrap_ecdf_allitems_sum_sigmax_T = cached_data["bootstrap_ecdf_allitems_sum_sigmax_T"]
                        bootstrap_ecdf_allitems_all_sigmax_T = cached_data["bootstrap_ecdf_allitems_all_sigmax_T"]
                        should_fit_bootstrap = False

                except IOError:
                    print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits"

        if should_fit_bootstrap:

            bootstrap_ecdf_bays_sigmax_T = dict()
            bootstrap_ecdf_allitems_sum_sigmax_T = dict()
            bootstrap_ecdf_allitems_all_sigmax_T = dict()

            # Fit bootstrap
            for sigmax_i, sigmax in enumerate(sigmax_space):
                for T_i, T in enumerate(T_space):
                    if T > 1:
                        # One bootstrap CDF per condition
                        bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF(
                            utils.dropnan(result_bootstrap_samples[sigmax_i, T_i])
                        )
                        bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF(
                            utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i])
                        )
                        bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF(
                            utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i])
                        )

                        # Store in a dict(sigmax) -> dict(T) -> ECDF object
                        bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(
                            ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax
                        )
                        bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(
                            ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax
                        )
                        bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(
                            ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax
                        )

            # Save everything to a file, for faster later plotting
            if caching_bootstrap_filename is not None:
                try:
                    with open(caching_bootstrap_filename, "w") as filecache_out:
                        data_bootstrap = dict(
                            bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T,
                            bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T,
                            bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T,
                        )
                        pickle.dump(data_bootstrap, filecache_out, protocol=2)
                except IOError:
                    print "Error writing out to caching file ", caching_bootstrap_filename

    if plots_hist_cdf:
        ## Plots now
        for sigmax_i, sigmax in enumerate(sigmax_space):
            for T_i, T in enumerate(T_space):
                if T > 1:
                    # Histogram of samples
                    _, axes = plt.subplots(ncols=3, figsize=(18, 6))
                    axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed="density")
                    axes[0].set_xlim([0.0, 1.0])
                    axes[1].hist(
                        utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]),
                        bins=100,
                        normed="density",
                    )
                    axes[1].set_xlim([0.0, 1.0])
                    axes[2].hist(
                        utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]),
                        bins=100,
                        normed="density",
                    )
                    axes[2].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure(
                            "hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf" % (sigmax, T)
                        )

                    # ECDF now
                    _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6))
                    axes[0].plot(
                        bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]["ecdf"].x,
                        bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]["ecdf"].y,
                        linewidth=2,
                    )
                    axes[0].set_xlim([0.0, 1.0])
                    axes[1].plot(
                        bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]["ecdf"].x,
                        bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]["ecdf"].y,
                        linewidth=2,
                    )
                    axes[1].set_xlim([0.0, 1.0])
                    axes[2].plot(
                        bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]["ecdf"].x,
                        bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]["ecdf"].y,
                        linewidth=2,
                    )
                    axes[2].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure(
                            "ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf" % (sigmax, T)
                        )

    if estimate_bootstrap:
        model_outputs = utils.load_npy(
            os.path.join(
                os.environ["WORKDIR_DROP"],
                "Experiments/error_distribution/error_distribution_conj_M100T6repetitions5_121113_outputs/global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy",
            )
        )
        data_responses_all = model_outputs["result_responses_all"][..., 0]
        data_target_all = model_outputs["result_target_all"][..., 0]
        data_nontargets_all = model_outputs["result_nontargets_all"][..., 0]

        # Compute bootstrap p-value
        for sigmax_i, sigmax in enumerate(sigmax_space):
            for T in T_space[1:]:
                bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, (T - 1)],
                    data_target_all[sigmax_i, (T - 1)],
                    data_nontargets_all[sigmax_i, (T - 1), :, : (T - 1)],
                    sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T - 1]["ecdf"],
                    allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T - 1]["ecdf"],
                )

                # TODO finish here!

    all_args = data_pbs.loaded_data["args_list"]
    variables_to_save = ["nb_repetitions"]

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)

        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="bootstrap_nontargets")

    plt.show()

    return locals()
Esempio n. 9
0
train and save
"""

import tensorflow as tf
import subprocess, os

from utils import load_npy, iterate_arr
from params import TRAIN_DATA, VAL_DATA, LOG_DIR, SAVE_DIR, VAL_LOG, TRAIN_LOG
from params import num_frames, num_sequences, batch_size, num_iterations

# import model and graph
from build_graph import X, train_op, summary_op
from build_graph import writer_train, writer_val, saver

# load training/validation data
train_arr = load_npy(TRAIN_DATA, num_frames=num_frames, limit=num_sequences)
if num_sequences is None:
    num_sequences = train_arr.shape[0]
train_iter = iterate_arr(train_arr, batch_size=batch_size)
val_arr = load_npy(VAL_DATA)

# clear old log
if os.path.exists(LOG_DIR):
    cmd = "rm -r {}".format(LOG_DIR)
    subprocess.call(cmd.split())

cmd = "mkdir {}".format(LOG_DIR)
subprocess.call(cmd.split())
cmd = "mkdir {}".format(TRAIN_LOG)
subprocess.call(cmd.split())
cmd = "mkdir {}".format(VAL_LOG)
Esempio n. 10
0
def main(in_path, out_path):
    predictions = utils.load_npy(in_path)
    write_predictions_to_csv(predictions, out_path)
    print 'Generated predictions file %s' % out_path
    n_categories = 6 + 1
    
    #Params
    dropout = 0.7
    n_feats = 600
    n_hidden = 64
    
    #hyper-params
    lr = 0.001
    epochs = 80
    batchsize = 8
    use_ctc = True

    #Data extraction and preparation
    X_train_path = data_folder + "X_train.npy"
    X_train = np.array(utils.load_npy(X_train_path))
    
    X_test_path = data_folder + "X_test.npy"
    X_test = np.array(utils.load_npy(X_test_path))
    
    y_train_path = data_folder + "y_train.npy"
    y_train = utils.load_npy(y_train_path)
    y_train = np.array([utils.categories_to_int(utils.labels_num_to_category(utils.collapse_num_labels(y))) for y in y_train])
    
    y_test_path = data_folder + "y_test.npy"
    y_test = utils.load_npy(y_test_path)
    y_test = np.array([utils.categories_to_int(utils.labels_num_to_category(utils.collapse_num_labels(y))) for y in y_test])
    
    #SAVE MEMORY??
#    X_train, y_train = utils.get_mini_dataset(X_train, y_train, 500)
#    X_test, y_test = utils.get_mini_dataset(X_test, y_test, 100)
Esempio n. 12
0
            p_err_mean = epoch_loss_train_mean
            self.learning_rate = self.lr_scheduler(learning_rate, e, delta_err)
            self.dropout = self.dropout_scheduler(dropout, e, delta_err)

            if self.logsdir is not None:
                self.save_weights()

        return

if __name__ == "__main__":
    epochs = 10
    lr = 0.01

    rnn = RNN_framewise_base(n_feats=600, n_classes=41, logsdir="./RNN_test/")

    X_train = utils.load_npy('../../data/RBM_hann_v2/X_train.npy')
    X_test = utils.load_npy('../../data/RBM_hann_v2/X_test.npy')
    y_train = utils.load_npy('../../data/RBM_hann_v2/y_train.npy')
    y_test = utils.load_npy('../../data/RBM_hann_v2/y_test.npy')

    y_train = [utils.collapse_num_labels(y) for y in y_train]
    y_test = [utils.collapse_num_labels(y) for y in y_test]

    y_train = utils.to_ctc(y_train)
    y_test = utils.to_ctc(y_test)

    rnn.set_data(X_train, X_test, y_train, y_test)

    del (X_train, y_train, X_test, y_test)

    rnn.fit(n_epochs=epochs,