def launcher_do_fit_mixturemodels_sequential_alltrecall(args):
    '''
        Run the model for 1..T items sequentially, for all possible trecall/T.
        Compute:
        - Precision of samples
        - EM mixture model fits. Both independent and collapsed model.
        - Theoretical Fisher Information
        - EM Mixture model distances to set of currently working datasets.
    '''

    print "Doing a piece of work for launcher_do_fit_mixturemodels_sequential_alltrecall"

    all_parameters = utils.argparse_2_dict(args)
    print all_parameters

    if all_parameters['burn_samples'] + all_parameters['num_samples'] < 200:
        print "WARNING> you do not have enough samples I think!", all_parameters['burn_samples'] + all_parameters['num_samples']

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load dataset to compare against
    data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(data_dir=all_parameters['experiment_data_dir'], fit_mixture_model=True)
    gorgo11_sequ_T_space = np.unique(data_gorgo11_sequ['n_items'])


    # Parameters to vary
    T_max = all_parameters['T']
    T_space = np.arange(1, T_max+1)
    repetitions_axis = -1

    # Result arrays
    result_all_precisions = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))
    result_fi_theo = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))
    result_fi_theocov = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))
    result_em_fits = np.nan*np.empty((T_space.size, T_space.size, 6, all_parameters['num_repetitions']))  # kappa, mixt_target, mixt_nontarget, mixt_random, ll, bic
    result_em_fits_collapsed_tr = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions']))  # kappa_tr, mixt_target_tr, mixt_nontarget_tr, mixt_random_tr
    result_em_fits_collapsed_summary = np.nan*np.empty((5, all_parameters['num_repetitions'])) # bic, ll, kappa_theta

    result_dist_gorgo11_sequ = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions']))  # kappa, mixt_target, mixt_nontarget, mixt_random
    result_dist_gorgo11_sequ_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))

    result_dist_gorgo11_sequ_collapsed = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions']))
    result_dist_gorgo11_sequ_collapsed_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))

    gorgo11_sequ_collapsed_mixtmod_mean = data_gorgo11_sequ['collapsed_em_fits_doublepowerlaw_array']


    # If desired, will automatically save all Model responses.
    if all_parameters['collect_responses']:
        print "--- Collecting all responses..."
        result_responses = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions']))
        result_target = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions']))
        result_nontargets = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], T_max-1, all_parameters['num_repetitions']))

    search_progress = progress.Progress(T_space.size*(T_space.size + 1)/2.*all_parameters['num_repetitions'])

    for repet_i in xrange(all_parameters['num_repetitions']):
        for T_i, T in enumerate(T_space):
            for trecall_i, trecall in enumerate(np.arange(T, 0, -1)):
                # Inverting indexing of trecall, to be consistent. trecall_i 0 == last item.
                # But trecall still means the actual time of recall!
                print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())
                print "Fit for T=%d, tr=%d, %d/%d" % (T, trecall, repet_i+1, all_parameters['num_repetitions'])

                # Update parameter
                all_parameters['T'] = T
                all_parameters['fixed_cued_feature_time'] = trecall - 1

                ### WORK WORK WORK work? ###
                # Instantiate
                (_, _, _, sampler) = launchers.init_everything(all_parameters)

                # Sample
                sampler.run_inference(all_parameters)

                # Compute precision
                print "get precision..."
                result_all_precisions[T_i, trecall_i, repet_i] = sampler.get_precision()

                # Fit mixture model, independent
                print "fit mixture model..."
                curr_params_fit = sampler.fit_mixture_model(use_all_targets=False)
                result_em_fits[T_i, trecall_i, :, repet_i] = [curr_params_fit[key] for key in ['kappa', 'mixt_target', 'mixt_nontargets_sum', 'mixt_random', 'train_LL', 'bic']]

                # Compute fisher info
                print "compute fisher info"
                result_fi_theo[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=False)
                result_fi_theocov[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=True)

                # Compute distances to datasets (this is for the non-collapsed stuff, not the best)
                if T in gorgo11_sequ_T_space:
                    gorgo11_sequ_mixtures_mean = data_gorgo11_sequ['em_fits_nitems_trecall_arrays'][gorgo11_sequ_T_space==T, trecall_i, :4].flatten()

                    result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i] = (gorgo11_sequ_mixtures_mean - result_em_fits[T_i, trecall_i, :4, repet_i])**2.
                    result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i] = utils.KL_div(result_em_fits[T_i, trecall_i, 1:4, repet_i], gorgo11_sequ_mixtures_mean[1:])


                # If needed, store responses
                if all_parameters['collect_responses']:
                    print "collect responses"
                    (responses, target, nontarget) = sampler.collect_responses()
                    result_responses[T_i, trecall_i, :, repet_i] = responses
                    result_target[T_i, trecall_i, :, repet_i] = target
                    result_nontargets[T_i, trecall_i, :, :T_i, repet_i] = nontarget


                print "CURRENT RESULTS:\n", result_all_precisions[T_i, trecall_i, repet_i], curr_params_fit, result_fi_theo[T_i, trecall_i, repet_i], result_fi_theocov[T_i, trecall_i, repet_i], np.sum(result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i]), np.sum(result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i]), "\n"
                ### /Work ###

                search_progress.increment()
                if run_counter % save_every == 0 or search_progress.done():
                    dataio.save_variables_default(locals())
                run_counter += 1

        # Fit Collapsed mixture model
        # TODO check dimensionality...
        print 'Fitting Collapsed double powerlaw mixture model...'
        params_fit = em_circularmixture_parametrickappa_doublepowerlaw.fit(T_space, result_responses[..., repet_i], result_target[..., repet_i], result_nontargets[..., repet_i], debug=False)

        # First store the parameters that depend on T/trecall
        for i, key in enumerate(['kappa', 'mixt_target_tr', 'mixt_nontargets_tr', 'mixt_random_tr']):
            result_em_fits_collapsed_tr[..., i, repet_i] =  params_fit[key]

        # Then the ones that do not, only one per full collapsed fit.
        result_em_fits_collapsed_summary[0, repet_i] = params_fit['bic']
        # result_em_fits_collapsed_summary[1, repet_i] = params_fit['train_LL']
        result_em_fits_collapsed_summary[2:, repet_i] = params_fit['kappa_theta']

        # Compute distances to dataset for collapsed model
        result_dist_gorgo11_sequ_collapsed[..., repet_i] = (gorgo11_sequ_collapsed_mixtmod_mean - result_em_fits_collapsed_tr[..., repet_i])**2.
        result_dist_gorgo11_sequ_collapsed_emmixt_KL[..., repet_i] = utils.KL_div(result_em_fits_collapsed_tr[..., 1:4, repet_i], gorgo11_sequ_collapsed_mixtmod_mean[..., 1:], axis=-1)


    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
    def get_model_em_fits(self, num_repetitions=1, use_cache=True):
        '''Will setup experimental data, sample from the model, and fit a
        collapsed powerlaw mixture model on the outcome.
        '''
        if self.model_em_fits is None or not use_cache:
            # Collect all data to fit.
            T = self.T_space.size

            model_data_dict = {
                'responses': np.nan*np.empty((T, T, self.num_datapoints)),
                'targets': np.nan*np.empty((T, T, self.num_datapoints)),
                'nontargets': np.nan*np.empty((
                    T, T, self.num_datapoints, T - 1))}

            search_progress = progress.Progress(
                T*(T + 1)/2.*num_repetitions)

            params_fit_double_all = []
            for repet_i in xrange(num_repetitions):
                for n_items_i, n_items in enumerate(self.T_space):
                    for trecall_i, trecall in enumerate(self.T_space):
                        if trecall <= n_items:
                            self.setup_experimental_stimuli(n_items, trecall)

                            print ("{:.2f}%, {} left - {} "
                                   "== Data, N={}, trecall={}. {}/{}. ").format(
                                       search_progress.percentage(),
                                       search_progress.time_remaining_str(),
                                       search_progress.eta_str(),
                                       n_items, trecall, repet_i+1,
                                       num_repetitions)

                            if ('samples' in
                                    self.get_names_stored_responses()
                                    and repet_i < 1):
                                self.restore_responses('samples')
                            else:
                                self.sampler.force_sampling_round()
                                self.store_responses('samples')

                            responses, targets, nontargets = (
                                self.sampler.collect_responses())

                            # collect all data
                            model_data_dict['responses'][
                                n_items_i,
                                trecall_i] = responses
                            model_data_dict['targets'][
                                n_items_i,
                                trecall_i] = targets
                            model_data_dict['nontargets'][
                                n_items_i,
                                trecall_i,
                                :,
                                :n_items_i] = nontargets

                            search_progress.increment()

                # Fit the collapsed mixture model
                params_fit_double = (
                    em_circularmixture_parametrickappa_doublepowerlaw.fit(
                        self.T_space,
                        model_data_dict['responses'],
                        model_data_dict['targets'],
                        model_data_dict['nontargets']))
                params_fit_double_all.append(params_fit_double)

            # Store statistics of powerlaw fits
            self.model_em_fits = collections.defaultdict(dict)
            emfits_keys = params_fit_double.keys()
            for key in emfits_keys:
                repets_param_fit_curr = [
                    param_fit_double[key]
                    for param_fit_double in params_fit_double_all]
                self.model_em_fits['mean'][key] = np.mean(
                    repets_param_fit_curr, axis=0)
                self.model_em_fits['std'][key] = np.std(
                    repets_param_fit_curr, axis=0)
                self.model_em_fits['sem'][key] = (
                    self.model_em_fits['std'][key] / np.sqrt(
                        num_repetitions))

        return self.model_em_fits
Example #3
0
    def fit_collapsed_mixture_model(self):
        '''
            Fit the new Collapsed Mixture Model, using data created
            just above in generate_data_subject_split.

            Do:
             * One fit per subject/nitems, using trecall as T_space
             * One fit per subject/trecall, using nitems as T_space
             * One fit per subject, using the double-powerlaw on nitems/trecall

        '''
        Tmax = self.dataset['data_subject_split']['nitems_space'].max()
        Tnum = self.dataset['data_subject_split']['nitems_space'].size


        self.dataset['collapsed_em_fits_subjects_nitems'] = dict()
        self.dataset['collapsed_em_fits_nitems'] = dict()

        self.dataset['collapsed_em_fits_subjects_trecall'] = dict()
        self.dataset['collapsed_em_fits_trecall'] = dict()

        self.dataset['collapsed_em_fits_doublepowerlaw_subjects'] = dict()
        self.dataset['collapsed_em_fits_doublepowerlaw'] = dict()
        self.dataset['collapsed_em_fits_doublepowerlaw_array'] = np.nan*np.empty((Tnum, Tnum, 4))


        for subject, subject_data_dict in self.dataset['data_subject_split']['data_subject'].iteritems():
            print 'Fitting Collapsed Mixture model for subject %d' % subject

            if True:
                # Use trecall as T_space, bit weird
                for n_items_i, n_items in enumerate(self.dataset['data_subject_split']['nitems_space']):

                    print '%d nitems, using trecall as T_space' % n_items

                    params_fit = em_circularmixture_parametrickappa.fit(np.arange(1, n_items+1), subject_data_dict['responses'][n_items_i, :(n_items)], subject_data_dict['targets'][n_items_i, :(n_items)], subject_data_dict['nontargets'][n_items_i, :(n_items), :, :(n_items - 1)], debug=False)

                    self.dataset['collapsed_em_fits_subjects_nitems'].setdefault(subject, dict())[n_items] = params_fit

                # Use nitems as T_space, as a function of trecall (be careful)
                for trecall_i, trecall in enumerate(self.dataset['data_subject_split']['nitems_space']):

                    print 'trecall %d, using n_items as T_space' % trecall

                    params_fit = em_circularmixture_parametrickappa.fit(np.arange(trecall, Tmax+1), subject_data_dict['responses'][trecall_i:, trecall_i], subject_data_dict['targets'][trecall_i:, trecall_i], subject_data_dict['nontargets'][trecall_i:, trecall_i], debug=False)

                    self.dataset['collapsed_em_fits_subjects_trecall'].setdefault(subject, dict())[trecall] = params_fit

            # Now do the correct fit, with double powerlaw on nitems+trecall
            print 'Double powerlaw fit'

            params_fit_double = em_circularmixture_parametrickappa_doublepowerlaw.fit(self.dataset['data_subject_split']['nitems_space'], subject_data_dict['responses'], subject_data_dict['targets'], subject_data_dict['nontargets'], debug=False)
            self.dataset['collapsed_em_fits_doublepowerlaw_subjects'][subject] = params_fit_double


        if True:
            ## Now compute mean/std collapsed_em_fits_nitems
            self.dataset['collapsed_em_fits_nitems']['mean'] = dict()
            self.dataset['collapsed_em_fits_nitems']['std'] = dict()
            self.dataset['collapsed_em_fits_nitems']['sem'] = dict()
            self.dataset['collapsed_em_fits_nitems']['values'] = dict()

            # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed
            emfits_keys = params_fit.keys()
            for n_items_i, n_items in enumerate(self.dataset['data_subject_split']['nitems_space']):
                for key in emfits_keys:
                    values_allsubjects = [self.dataset['collapsed_em_fits_subjects_nitems'][subject][n_items][key] for subject in self.dataset['data_subject_split']['subjects_space']]

                    self.dataset['collapsed_em_fits_nitems']['mean'].setdefault(n_items, dict())[key] = np.mean(values_allsubjects, axis=0)
                    self.dataset['collapsed_em_fits_nitems']['std'].setdefault(n_items, dict())[key] = np.std(values_allsubjects, axis=0)
                    self.dataset['collapsed_em_fits_nitems']['sem'].setdefault(n_items, dict())[key] = self.dataset['collapsed_em_fits_nitems']['std'][n_items][key]/np.sqrt(self.dataset['data_subject_split']['subjects_space'].size)
                    self.dataset['collapsed_em_fits_nitems']['values'].setdefault(n_items, dict())[key] = values_allsubjects

            ## Same for the other ones
            self.dataset['collapsed_em_fits_trecall']['mean'] = dict()
            self.dataset['collapsed_em_fits_trecall']['std'] = dict()
            self.dataset['collapsed_em_fits_trecall']['sem'] = dict()
            self.dataset['collapsed_em_fits_trecall']['values'] = dict()

            # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed
            emfits_keys = params_fit.keys()
            for trecall_i, trecall in enumerate(self.dataset['data_subject_split']['nitems_space']):
                for key in emfits_keys:
                    values_allsubjects = [self.dataset['collapsed_em_fits_subjects_trecall'][subject][trecall][key] for subject in self.dataset['data_subject_split']['subjects_space']]

                    self.dataset['collapsed_em_fits_trecall']['mean'].setdefault(trecall, dict())[key] = np.mean(values_allsubjects, axis=0)
                    self.dataset['collapsed_em_fits_trecall']['std'].setdefault(trecall, dict())[key] = np.std(values_allsubjects, axis=0)
                    self.dataset['collapsed_em_fits_trecall']['sem'].setdefault(trecall, dict())[key] = self.dataset['collapsed_em_fits_trecall']['std'][trecall][key]/np.sqrt(self.dataset['data_subject_split']['subjects_space'].size)
                    self.dataset['collapsed_em_fits_trecall']['values'].setdefault(trecall, dict())[key] = values_allsubjects

        # Collapsed full double powerlaw model across subjects
        self.dataset['collapsed_em_fits_doublepowerlaw']['mean'] = dict()
        self.dataset['collapsed_em_fits_doublepowerlaw']['std'] = dict()
        self.dataset['collapsed_em_fits_doublepowerlaw']['sem'] = dict()
        self.dataset['collapsed_em_fits_doublepowerlaw']['values'] = dict()

        # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed
        emfits_keys = params_fit_double.keys()
        for key in emfits_keys:
            values_allsubjects = [self.dataset['collapsed_em_fits_doublepowerlaw_subjects'][subject][key] for subject in self.dataset['data_subject_split']['subjects_space']]

            self.dataset['collapsed_em_fits_doublepowerlaw']['mean'][key] = np.mean(values_allsubjects, axis=0)
            self.dataset['collapsed_em_fits_doublepowerlaw']['std'][key] = np.std(values_allsubjects, axis=0)
            self.dataset['collapsed_em_fits_doublepowerlaw']['sem'][key] = self.dataset['collapsed_em_fits_doublepowerlaw']['std'][key]/np.sqrt(self.dataset['data_subject_split']['subjects_space'].size)
            self.dataset['collapsed_em_fits_doublepowerlaw']['values'][key] = values_allsubjects

        # Construct some easy arrays to compare the fit to the dataset
        self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 0] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['kappa']
        self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 1] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['mixt_target_tr']
        self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 2] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['mixt_nontargets_tr']
        self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 3] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['mixt_random_tr']