Ejemplo n.º 1
0
def bin_flat_spike_data2(fsd, trial_counter=None, F_SAMP=30e3, n_bins=75, 
    t_start=-.25, t_stop=.5, split_on=None, include_trials='hits',
    split_on_filter=None):
    """Bins in time over trials, splitting on split_on.
    
    fsd : a flat array of spike times, with replaced stimulus names
    split_on : REQUIRED, how to split fsd, eg ['session', 'unit']
    split_on_filter : list of keys to be included, after splitting
        if None, then everything is included
    
    It will be separately binned over sound.
    """
    
    if split_on is None:
        split_on = []
    
    # iterate over the groups and bin each one
    rec_l = []    
    for key, df in fsd.groupby(split_on):
        if split_on_filter is not None and key not in split_on_filter:
            continue
        
        for sound_name in ['lehi', 'rihi', 'lelo', 'rilo']:
            for block_name in ['LB', 'PB']:
                # subframe
                subdf = df[(df.sound == sound_name) & (df.block == block_name)]
                
                # get session name
                session_l = np.unique(np.asarray(df.session))
                assert len(session_l) == 1
                session = session_l[0]
                
                # histogramming
                counts, t_vals = myutils.times2bins(
                    np.asarray(subdf.adj_time), f_samp=F_SAMP, 
                    t_start=t_start, t_stop=t_stop, bins=n_bins,
                    return_t=True)
        
                # count trials
                n_trials = trial_counter(session=session, block=block_name, 
                    sound=sound_name, include_trials=include_trials)
                
                # comment this out, because user might request subset of
                # original trial set
                #if n_trials < len(np.unique(np.asarray(subdf.trial))):
                #    raise ValueError("counted more trials than exist")
        
                # Add in the keyed info (session etc), plus 
                # n_counts, n_trials, and bin
                this_frame = [list(key) + 
                    [sound_name, block_name, count, n_trials, t_val] 
                    for count, t_val in zip(counts, t_vals)]
                
                # append to growing list
                rec_l += this_frame
    
    # convert to new data frame, using same keyed columns plus our new ones
    cols = split_on + ['sound', 'block', 'counts', 'trials', 'time']
    newdf = pandas.DataFrame(rec_l, columns=cols)
    return newdf
Ejemplo n.º 2
0
def calc_effect_size_by_sound(fdf, trial_lister=None, 
    comp_meth='ttest', p_adj_meth='BH', split_on=None, split_on_filter=None,
    t_start=-.25, t_stop=.5, bins=75, mag_meth='diff'):
    """Calculates a heat map of effect size for each sound, masked by p-value.
    
    Groups spikes by :split_on:, eg nid. Then compares across blocks,
    using compare_rasters(comp_meth, p_adj_meth).
    
    fdf : flat spike times
    trial_list : SpikeSorter.list_trials_by_type
    fig : figure to plot into
    comp_meth : how to compare the binned spike times across  blocks
    
    Returns
        mag_d : dict of effect magnitude by sound name
        p_d : dict of p-value by sound name
        names : list of values of :split_on:
        t : time bins
    """
    names, mag_d, p_d = [], {}, {}
    if split_on is None:
        g = {None: fdf}.items()
    else:
        g = fdf.groupby(split_on)
    
    # Iterate over groups
    n_incl = 0
    for key, df in g:        
        if split_on_filter is not None and key not in split_on_filter:
            continue
        else:
            n_incl += 1
        
        # get session name
        session_l = np.unique(np.asarray(df.session))
        if len(session_l) != 1:
            print "error: must be exactly one session!"
            1/0
        session = session_l[0]     
        names.append(key)

        # iterate over sound * block
        g1 = df.groupby(['sound', 'block'])    
        for n, sound_name in enumerate(['lehi', 'rihi', 'lelo', 'rilo']):
            # Keep a running list in each sound
            if sound_name not in mag_d:
                mag_d[sound_name] = []
                p_d[sound_name] = []
            
            # iterate over blocks and get folded spike times
            fsdict = {}
            for block_name in ['LB', 'PB']:
                # get spikes form this sound * block
                try:
                    x = df.ix[g1.groups[sound_name, block_name]]            
                except KeyError:
                    # no spikes of this sound * block!
                    x = []
                
                # fold by trial
                folded_spikes = fold(x, trial_lister(session, 
                    sound=sound_name, block=block_name))
                
                # convert to bins
                ns, t = myutils.times2bins(folded_spikes, return_t=True,
                    t_start=t_start, t_stop=t_stop, bins=bins, f_samp=30000.)
                
                fsdict[block_name] = ns.transpose()
            
            # Do the comparison
            mag, p = compare_rasters(fsdict['LB'], fsdict['PB'],
                meth=comp_meth, p_adj_meth=p_adj_meth, mag_meth=mag_meth)
            mag_d[sound_name].append(mag)
            p_d[sound_name].append(p)
    
    for key in mag_d:
        mag_d[key] = np.array(mag_d[key])
        p_d[key] = np.array(p_d[key])
    
    if split_on_filter is not None and len(split_on_filter) != n_incl:
        print "warning: %d in filter but only found %d" % (len(split_on_filter),
            n_incl)
    
    return mag_d, p_d, names, t
Ejemplo n.º 3
0
    def get_binned_spikes3(self, spike_filter=None, trial_filter=None):
        """Generic binning function operating on self._fsd
        
        spike_filter : dataframe describing how to split fsd
            The columns are the hierarchy to split on:
                eg ['session', 'unit']
            The items are the ones to include.
            If no items, then everything is included.
            If None, then bin over everything except 'adj_time' or 'spike_time'
            
            Here we delineate every combination because it's not separable
            over session and unit (usually).
        
        trial_filter :
            How to do this filtering?
            For instance: hits only, stim_numbers 1-12, expressed as dicts
            In this case I wouldn't want to enumerate every combination
            because I can just take intersection over stim numbers and outcome.
            
            Although note we might want to combine errors and wrongports,
            for instance.
            
            It's implicit that we want to do this for each session in
            spike_filter.
        

        First the spikes are grouped over the columns in spike_filter.
        For each group, the trials are grouped over the columns in trial_filter.
        This cross-result is histogrammed.
        All results are concatenated and returned.

        The actual histogramming is done by myutils.times2bins using
        self.f_samp, t_start, t_stop, bins
        
        
        """
        input = self._fsd
        
        # default, use all columns and include all data
        if spike_filter is None:
            col_list = input.columns.tolist()
            remove_cols = ['adj_time', 'spike_time']
            for col in remove_cols:
                if col in col_list:
                    col_list.remove(col)
            spike_filter = pandas.DataFrame(columns=col_list)
        
        # Choose data from `input` by defining the following variables:
        #   `keylist` : a list of keys to include, each separately binned
        #   `grouped_data` : a dict from each key in keylist, to the data
        #   `keynames` : what to call each entry of the key in the result
        if len(spike_filter) == 0:
            # use all data
            keynames = spike_filter.columns.tolist()
            keylist = [tuple([myutils.only_one(input[col])
                for col in keynames])]
            val = input
            grouped_data = {keylist[0]: val}            
        elif len(spike_filter) == 1:
            # Optimized for the case of selecting a single unit
            d = {}
            for col in spike_filter:
                d[col] = spike_filter[col][0]            
            mask = myutils.pick_mask(input, **d)
            
            keylist = spike_filter.to_records(index=False) # length 1
            keynames = spike_filter.columns.tolist()        
            grouped_data = {keylist[0] : input.ix[mask]}
        else:
            # standard case
            g = input.groupby(spike_filter.columns.tolist())
            grouped_data = g.groups
            keylist = spike_filter.to_records(index=False)
            keynames = spike_filter.columns.tolist()
        
        # Now group the trials
        att = self.all_trials.reset_index().set_index(['session', 'trial'], drop=False)
        g2 = att.groupby(trial_filter.columns.tolist())
        #g2 = self.all_trials.groupby(trial_filter.columns.tolist())
        
        
        # Now iterate through the keys in keylist and the corresponding values
        # in grouped_data.
        rec_l = []    
        for key in keylist:
            # Take the data from this group
            subdf = grouped_data[key]
            
            for g2k, g2v in g2:
                # count trials of this type from this session
                session = myutils.only_one(subdf.session)
                n_trials = len(g2v.ix[session])
                if n_trials == 0:
                    # for example if a possible combination never actually
                    # occurred
                    continue

                # Join the spikes on the columns of trial filter
                subsubdf = subdf.join(g2v[trial_filter.columns], 
                    on=['session', 'trial'], how='inner', rsuffix='rrr')
                
                # check for already-joined columns
                for col in trial_filter.columns:
                    if col+'rrr' in subsubdf.columns:
                        assert (subsubdf[col] == subsubdf[col+'rrr']).all()
                        subsubdf.pop(col + 'rrr')
            
                # histogramming
                counts, t_vals = myutils.times2bins(
                    np.asarray(subsubdf.adj_time), return_t=True, 
                    f_samp=self.f_samp, t_start=self.t_start,
                    t_stop=self.t_stop, bins=self.bins)
                
                # Add in the keyed info (session etc), plus 
                # n_counts, n_trials, and bin
                frame_label = list(key) + list(np.array([g2k]).flatten())
                this_frame = [frame_label +
                    [count, n_trials, t_val, bin] 
                    for bin, (count, t_val) in enumerate(zip(counts, t_vals))]
                
                # append to growing list
                rec_l += this_frame
        
        # convert to new data frame, using same keyed columns plus our new ones
        cols = keynames + trial_filter.columns.tolist() + [
            'counts', 'trials', 'time', 'bin']
        newdf = pandas.DataFrame(rec_l, columns=cols)
        
        # drop the combinations that never actually occurred (future trials)
        return newdf
Ejemplo n.º 4
0
    def get_binned_spikes_by_trial(self, split_on, split_on_filter=None,
        f_samp=30e3, t_start=-.25, t_stop=.5, bins=75, include_trials='hits'):
        """Returns binned data separately for each trial.
        
        There is a variable number of columns bin%d, depending on the number
        you request.
        
        Format:
        <class 'pandas.core.frame.DataFrame'>
        Int64Index: 23202 entries, 0 to 23201
        Data columns:
        session    23202  non-null values
        tetrode    23202  non-null values
        sound      23202  non-null values
        block      23202  non-null values
        trial      23202  non-null values
        bin0       23202  non-null values
        dtypes: int64(3), object(3)
        """

        fsd = self.read_flat_spikes_and_trials(stim_number_filter=range(5,13),
            include_trials=include_trials)
        replace_stim_numbers_with_names(fsd)
        
        g = fsd.groupby(split_on)
        
        #df = pandas.DataFrame()
        dfs = []
        for key, val in g:
            if split_on_filter is not None and key not in split_on_filter:                
                continue
                
        
            for sound_name in ['lehi', 'rihi', 'lelo', 'rilo']:
                for block_name in ['LB', 'PB']:
                    # subframe
                    subdf = val[(val.sound == sound_name) & 
                        (val.block == block_name)]
                    
                    # get session name
                    session_l = np.unique(np.asarray(subdf.session))
                    if len(session_l) == 0:
                        continue
                    elif len(session_l) > 1:
                        raise "Non-unique sessions, somehow"
                    else:
                        session = session_l[0]

                    trial_list = self.list_trials_by_type(session=session,
                        sound=sound_name, block=block_name, 
                        include_trials=include_trials)
            
                    counts, times = myutils.times2bins(
                        fold(subdf, trial_list),
                        f_samp=f_samp, t_start=t_start, t_stop=t_stop, bins=bins, 
                        return_t=True)
                    
                    this_frame = [list(key) + [sound_name, block_name, trial] 
                        + list(count)
                        for count, trial in zip(counts, trial_list)]
                    
                    dfs.append(pandas.DataFrame(this_frame,
                        columns=(split_on + ['sound', 'block', 'trial'] +
                        ['bin%d' % n for n in range(counts.shape[1])])))#,
                    #    ignore_index=True)
            
        return pandas.concat(dfs, ignore_index=True)