def fit_mixture_model(self):
        '''
            Special fitting for this dual recall dataset
        '''

        self.dataset['em_fits'] = dict(
            kappa=np.empty(self.dataset['probe_angle'].size),
            mixt_target=np.empty(self.dataset['probe_angle'].size),
            mixt_nontargets=np.empty(self.dataset['probe_angle'].size),
            mixt_random=np.empty(self.dataset['probe_angle'].size),
            resp_target=np.empty(self.dataset['probe_angle'].size),
            resp_nontarget=np.empty(self.dataset['probe_angle'].size),
            resp_random=np.empty(self.dataset['probe_angle'].size),
            train_LL=np.empty(self.dataset['probe_angle'].size),
            test_LL=np.empty(self.dataset['probe_angle'].size))
        for key in self.dataset['em_fits']:
            self.dataset['em_fits'][key].fill(np.nan)

        self.dataset['em_fits_angle_nitems_subjects'] = dict()
        self.dataset['em_fits_angle_nitems'] = dict(mean=dict(), std=dict(), values=dict())
        self.dataset['em_fits_colour_nitems_subjects'] = dict()
        self.dataset['em_fits_colour_nitems'] = dict(mean=dict(), std=dict(), values=dict())

        # This dataset is a bit special with regards to subjects, it's a conditional design:
        # 8 Subjects (1 - 8) only did 6 items, both angle/colour trials
        # 6 Subjects (9 - 14) did 3 items, both angle/colour trials.
        # We have 160 trials per (subject, n_item, condition).

        # Angles trials
        for n_items_i, n_items in enumerate(self.dataset['n_items_space']):
            for subject_i, subject in enumerate(self.dataset['subject_space']):
                ids_filtered = ((self.dataset['subject']==subject) & (self.dataset['n_items'] == n_items) & (self.dataset.get('masked', False) == False)).flatten()

                ids_filtered = self.dataset['angle_trials'] & ids_filtered

                if ids_filtered.sum() > 0:
                    print 'Angle trials, %d items, subject %d, %d datapoints' % (n_items, subject, self.dataset['probe_angle'][ids_filtered, 0].size)

                    # params_fit = em_circularmixture.fit(self.dataset['probe_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 1:])

                    cross_valid_outputs = em_circularmixture.cross_validation_kfold(self.dataset['probe_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 1:], K=10, shuffle=True, debug=False)
                    params_fit = cross_valid_outputs['best_fit']
                    resp = em_circularmixture.compute_responsibilities(self.dataset['probe_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 1:], params_fit)

                    self.dataset['em_fits']['kappa'][ids_filtered] = params_fit['kappa']
                    self.dataset['em_fits']['mixt_target'][ids_filtered] = params_fit['mixt_target']
                    self.dataset['em_fits']['mixt_nontargets'][ids_filtered] = params_fit['mixt_nontargets']
                    self.dataset['em_fits']['mixt_random'][ids_filtered] = params_fit['mixt_random']
                    self.dataset['em_fits']['resp_target'][ids_filtered] = resp['target']
                    self.dataset['em_fits']['resp_nontarget'][ids_filtered] = np.sum(resp['nontargets'], axis=1)
                    self.dataset['em_fits']['resp_random'][ids_filtered] = resp['random']
                    self.dataset['em_fits']['train_LL'][ids_filtered] = params_fit['train_LL']
                    self.dataset['em_fits']['test_LL'][ids_filtered] = cross_valid_outputs['best_test_LL']

                    self.dataset['em_fits_angle_nitems_subjects'].setdefault(n_items, dict())[subject] = params_fit

            ## Now compute mean/std em_fits per n_items
            self.dataset['em_fits_angle_nitems']['mean'][n_items] = dict()
            self.dataset['em_fits_angle_nitems']['std'][n_items] = dict()
            self.dataset['em_fits_angle_nitems']['values'][n_items] = dict()

            # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed
            emfits_keys = params_fit.keys()
            for key in emfits_keys:
                values_allsubjects = [self.dataset['em_fits_angle_nitems_subjects'][n_items][subject][key] for subject in self.dataset['em_fits_angle_nitems_subjects'][n_items]]

                self.dataset['em_fits_angle_nitems']['mean'][n_items][key] = np.mean(values_allsubjects)
                self.dataset['em_fits_angle_nitems']['std'][n_items][key] = np.std(values_allsubjects)
                self.dataset['em_fits_angle_nitems']['values'][n_items][key] = values_allsubjects


        # Colour trials
        for n_items_i, n_items in enumerate(self.dataset['n_items_space']):
            for subject_i, subject in enumerate(self.dataset['subject_space']):
                ids_filtered = ((self.dataset['subject']==subject) & (self.dataset['n_items'] == n_items) & (self.dataset.get('masked', False) == False)).flatten()

                ids_filtered = self.dataset['colour_trials'] & ids_filtered

                if ids_filtered.sum() > 0:
                    print 'Colour trials, %d items, subject %d, %d datapoints' % (n_items, subject, self.dataset['probe_angle'][ids_filtered, 0].size)

                    cross_valid_outputs = em_circularmixture.cross_validation_kfold(self.dataset['probe_colour'][ids_filtered, 0], self.dataset['item_colour'][ids_filtered, 0], self.dataset['item_colour'][ids_filtered, 1:], K=10, shuffle=True, debug=False)
                    params_fit = cross_valid_outputs['best_fit']
                    resp = em_circularmixture.compute_responsibilities(self.dataset['probe_colour'][ids_filtered, 0], self.dataset['item_colour'][ids_filtered, 0], self.dataset['item_colour'][ids_filtered, 1:], params_fit)

                    self.dataset['em_fits']['kappa'][ids_filtered] = params_fit['kappa']
                    self.dataset['em_fits']['mixt_target'][ids_filtered] = params_fit['mixt_target']
                    self.dataset['em_fits']['mixt_nontargets'][ids_filtered] = params_fit['mixt_nontargets']
                    self.dataset['em_fits']['mixt_random'][ids_filtered] = params_fit['mixt_random']
                    self.dataset['em_fits']['resp_target'][ids_filtered] = resp['target']
                    self.dataset['em_fits']['resp_nontarget'][ids_filtered] = np.sum(resp['nontargets'], axis=1)
                    self.dataset['em_fits']['resp_random'][ids_filtered] = resp['random']
                    self.dataset['em_fits']['train_LL'][ids_filtered] = params_fit['train_LL']
                    self.dataset['em_fits']['test_LL'][ids_filtered] = cross_valid_outputs['best_test_LL']

                    self.dataset['em_fits_colour_nitems_subjects'].setdefault(n_items, dict())[subject] = params_fit

            ## Now compute mean/std em_fits per n_items
            self.dataset['em_fits_colour_nitems']['mean'][n_items] = dict()
            self.dataset['em_fits_colour_nitems']['std'][n_items] = dict()
            self.dataset['em_fits_colour_nitems']['values'][n_items] = dict()

            # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed
            emfits_keys = params_fit.keys()
            for key in emfits_keys:
                values_allsubjects = [self.dataset['em_fits_colour_nitems_subjects'][n_items][subject][key] for subject in self.dataset['em_fits_colour_nitems_subjects'][n_items]]

                self.dataset['em_fits_colour_nitems']['mean'][n_items][key] = np.mean(values_allsubjects)
                self.dataset['em_fits_colour_nitems']['std'][n_items][key] = np.std(values_allsubjects)
                self.dataset['em_fits_colour_nitems']['values'][n_items][key] = values_allsubjects

        ## Construct array versions of the em_fits_nitems mixture proportions, for convenience
        self.construct_arrays_em_fits()
def launcher_do_mixed_special_stimuli(args):
    '''
        Fit mixed model, varying the ratio_conj
        See how the precision of recall and mixture model parameters evolve
    '''

    print "Doing a piece of work for launcher_do_mixed_special_stimuli"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Parameters to vary
    ratio_space = (np.arange(0, all_parameters['M']**0.5)**2.)/all_parameters['M']

    # Result arrays
    result_all_precisions = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_em_fits = np.nan*np.ones((ratio_space.size, 5, all_parameters['num_repetitions']))  # kappa, mixt_target, mixt_nontarget, mixt_random, ll
    result_em_resp = np.nan*np.ones((ratio_space.size, 1+all_parameters['T'], all_parameters['N'], all_parameters['num_repetitions']))

    # If desired, will automatically save all Model responses.
    if all_parameters['subaction'] == 'collect_responses':
        result_responses = np.nan*np.ones((ratio_space.size, all_parameters['N'], all_parameters['num_repetitions']))
        result_target = np.nan*np.ones((ratio_space.size, all_parameters['N'], all_parameters['num_repetitions']))
        result_nontargets = np.nan*np.ones((ratio_space.size, all_parameters['N'], all_parameters['T']-1, all_parameters['num_repetitions']))


    search_progress = progress.Progress(ratio_space.size*all_parameters['num_repetitions'])

    for repet_i in xrange(all_parameters['num_repetitions']):
        for ratio_i, ratio_conj in enumerate(ratio_space):
            print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

            print "Fit for ratio_conj=%.2f, %d/%d" % (ratio_conj, repet_i+1, all_parameters['num_repetitions'])

            # Update parameter
            all_parameters['ratio_conj'] = ratio_conj

            ### WORK WORK WORK work? ###

            # Generate specific stimuli
            all_parameters['stimuli_generation'] = 'specific_stimuli'

            # Instantiate
            (random_network, data_gen, stat_meas, sampler) = launchers.init_everything(all_parameters)

            # Sample
            sampler.run_inference(all_parameters)

            # Compute precision
            result_all_precisions[ratio_i, repet_i] = sampler.get_precision()

            # Fit mixture model
            curr_params_fit = em_circularmixture.fit(*sampler.collect_responses())
            curr_resp = em_circularmixture.compute_responsibilities(*(sampler.collect_responses() + (curr_params_fit,) ))

            result_em_fits[ratio_i, :, repet_i] = [curr_params_fit[key] for key in ('kappa', 'mixt_target', 'mixt_nontargets', 'mixt_random', 'train_LL')]
            result_em_resp[ratio_i, 0, :, repet_i] = curr_resp['target']
            result_em_resp[ratio_i, 1:-1, :, repet_i] = curr_resp['nontargets'].T
            result_em_resp[ratio_i, -1, :, repet_i] = curr_resp['random']

            print result_all_precisions[ratio_i, repet_i], curr_params_fit

            # If needed, store responses
            if all_parameters['subaction'] == 'collect_responses':
                (responses, target, nontarget) = sampler.collect_responses()
                result_responses[ratio_i, :, repet_i] = responses
                result_target[ratio_i, :, repet_i] = target
                result_nontargets[ratio_i, ..., repet_i] = nontarget

                print "collected responses"

            ### /Work ###

            search_progress.increment()
            if run_counter % save_every == 0 or search_progress.done():
                dataio.save_variables_default(locals())
            run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
コード例 #3
0
    def fit_mixture_model(self):
        N = self.dataset['probe'].size

        # Initialize empty arrays and dicts
        self.dataset['em_fits'] = dict(kappa=np.empty(N),
                                       mixt_target=np.empty(N),
                                       mixt_nontargets=np.empty(N),
                                       mixt_nontargets_sum=np.empty(N),
                                       mixt_random=np.empty(N),
                                       resp_target=np.empty(N),
                                       resp_nontarget=np.empty(N),
                                       resp_random=np.empty(N),
                                       train_LL=np.empty(N),
                                       test_LL=np.empty(N),
                                       K=np.empty(N),
                                       bic=np.empty(N),
                                       aic=np.empty(N),
                                       )
        for key in self.dataset['em_fits']:
            self.dataset['em_fits'][key].fill(np.nan)
        self.dataset['target'] = np.empty(N)
        self.dataset['em_fits_subjects_nitems'] = dict()
        for subject in np.unique(self.dataset['subject']):
            self.dataset['em_fits_subjects_nitems'][subject] = dict()
        self.dataset['em_fits_nitems'] = dict(mean=dict(), std=dict(), values=dict())

        # Compute mixture model fits per n_items and per subject
        for n_items in np.unique(self.dataset['n_items']):
            for subject in np.unique(self.dataset['subject']):
                ids_filter = (self.dataset['subject'] == subject).flatten() & \
                             (self.dataset['n_items'] == n_items).flatten()
                print "Fit mixture model, %d items, subject %d, %d datapoints" % (subject, n_items, np.sum(ids_filter))

                self.dataset['target'][ids_filter] = self.dataset['item_angle'][ids_filter, 0]

                params_fit = em_circmixtmodel.fit(
                    self.dataset['response'][ids_filter, 0],
                    self.dataset['item_angle'][ids_filter, 0],
                    self.dataset['item_angle'][ids_filter, 1:]
                )
                params_fit['mixt_nontargets_sum'] = np.sum(
                    params_fit['mixt_nontargets']
                )

                resp = em_circmixtmodel.compute_responsibilities(
                    self.dataset['response'][ids_filter, 0],
                    self.dataset['item_angle'][ids_filter, 0],
                    self.dataset['item_angle'][ids_filter, 1:],
                    params_fit
                )

                # Copy all data
                for k, v in params_fit.iteritems():
                    self.dataset['em_fits'][k][ids_filter] = v

                self.dataset['em_fits']['resp_target'][ids_filter] = \
                    resp['target']
                self.dataset['em_fits']['resp_nontarget'][ids_filter] = \
                    np.sum(resp['nontargets'], axis=1)
                self.dataset['em_fits']['resp_random'][ids_filter] = \
                    resp['random']

                self.dataset['em_fits_subjects_nitems'][subject][n_items] = params_fit


            ## Now compute mean/std em_fits per n_items
            self.dataset['em_fits_nitems']['mean'][n_items] = dict()
            self.dataset['em_fits_nitems']['std'][n_items] = dict()
            self.dataset['em_fits_nitems']['values'][n_items] = dict()

            # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed
            emfits_keys = params_fit.keys()
            for key in emfits_keys:
                values_allsubjects = [self.dataset['em_fits_subjects_nitems'][subject][n_items][key] for subject in np.unique(self.dataset['subject'])]

                self.dataset['em_fits_nitems']['mean'][n_items][key] = np.mean(values_allsubjects)
                self.dataset['em_fits_nitems']['std'][n_items][key] = np.std(values_allsubjects)
                self.dataset['em_fits_nitems']['values'][n_items][key] = values_allsubjects

        ## Construct array versions of the em_fits_nitems mixture proportions, for convenience
        self.construct_arrays_em_fits()
def launcher_do_hierarchical_special_stimuli_varyMMlower(args):
    '''
        Fit Hierarchical model, varying the ratio of M to Mlower
        See how the precision of recall and mixture model parameters evolve
    '''

    print "Doing a piece of work for launcher_do_mixed_special_stimuli"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Parameters to vary
    M_space = np.arange(1, all_parameters['M']+1)
    M_lower_space = np.arange(2, all_parameters['M']+1, 2)
    MMlower_all = np.array(cross(M_space, M_lower_space))
    MMlower_valid_space = MMlower_all[np.nonzero(np.sum(MMlower_all, axis=1) == all_parameters['M'])[0]]

    # limit space, not too big...
    MMlower_valid_space = MMlower_valid_space[::5]
    print "MMlower size", MMlower_valid_space.shape[0]

    # Result arrays
    result_all_precisions = np.nan*np.ones((MMlower_valid_space.shape[0], all_parameters['num_repetitions']))
    result_em_fits = np.nan*np.ones((MMlower_valid_space.shape[0], 5, all_parameters['num_repetitions']))  # kappa, mixt_target, mixt_nontarget, mixt_random, ll
    result_em_resp = np.nan*np.ones((MMlower_valid_space.shape[0], 1+all_parameters['T'], all_parameters['N'], all_parameters['num_repetitions']))

    # If desired, will automatically save all Model responses.
    if all_parameters['subaction'] == 'collect_responses':
        result_responses = np.nan*np.ones((MMlower_valid_space.shape[0], all_parameters['N'], all_parameters['num_repetitions']))
        result_target = np.nan*np.ones((MMlower_valid_space.shape[0], all_parameters['N'], all_parameters['num_repetitions']))
        result_nontargets = np.nan*np.ones((MMlower_valid_space.shape[0], all_parameters['N'], all_parameters['T']-1, all_parameters['num_repetitions']))


    search_progress = progress.Progress(MMlower_valid_space.shape[0]*all_parameters['num_repetitions'])

    for repet_i in xrange(all_parameters['num_repetitions']):
        for MMlower_i, MMlower in enumerate(MMlower_valid_space):
            print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

            print "Fit for M=%d, Mlower=%d, %d/%d" % (MMlower[0], MMlower[1], repet_i+1, all_parameters['num_repetitions'])

            # Update parameter
            all_parameters['M']             = MMlower[0]
            all_parameters['M_layer_one']   = MMlower[1]

            ### WORK WORK WORK work? ###

            # Generate specific stimuli
            all_parameters['stimuli_generation'] = 'specific_stimuli'
            all_parameters['code_type'] = 'hierarchical'

            # Instantiate
            (random_network, data_gen, stat_meas, sampler) = launchers.init_everything(all_parameters)

            # Sample
            sampler.run_inference(all_parameters)

            # Compute precision
            result_all_precisions[MMlower_i, repet_i] = sampler.get_precision()

            # Fit mixture model
            curr_params_fit = em_circularmixture.fit(*sampler.collect_responses())
            curr_resp = em_circularmixture.compute_responsibilities(*(sampler.collect_responses() + (curr_params_fit,) ))

            print curr_params_fit

            result_em_fits[MMlower_i, :, repet_i] = [curr_params_fit[key] for key in ('kappa', 'mixt_target', 'mixt_nontargets', 'mixt_random', 'train_LL')]
            result_em_resp[MMlower_i, 0, :, repet_i] = curr_resp['target']
            result_em_resp[MMlower_i, 1:-1, :, repet_i] = curr_resp['nontargets'].T
            result_em_resp[MMlower_i, -1, :, repet_i] = curr_resp['random']

            # If needed, store responses
            if all_parameters['subaction'] == 'collect_responses':
                (responses, target, nontarget) = sampler.collect_responses()
                result_responses[MMlower_i, :, repet_i] = responses
                result_target[MMlower_i, :, repet_i] = target
                result_nontargets[MMlower_i, ..., repet_i] = nontarget

                print "collected responses"

            ### /Work ###

            search_progress.increment()
            if run_counter % save_every == 0 or search_progress.done():
                dataio.save_variables_default(locals())
            run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()