def bin_flat_spike_data2(fsd, trial_counter=None, F_SAMP=30e3, n_bins=75, t_start=-.25, t_stop=.5, split_on=None, include_trials='hits', split_on_filter=None): """Bins in time over trials, splitting on split_on. fsd : a flat array of spike times, with replaced stimulus names split_on : REQUIRED, how to split fsd, eg ['session', 'unit'] split_on_filter : list of keys to be included, after splitting if None, then everything is included It will be separately binned over sound. """ if split_on is None: split_on = [] # iterate over the groups and bin each one rec_l = [] for key, df in fsd.groupby(split_on): if split_on_filter is not None and key not in split_on_filter: continue for sound_name in ['lehi', 'rihi', 'lelo', 'rilo']: for block_name in ['LB', 'PB']: # subframe subdf = df[(df.sound == sound_name) & (df.block == block_name)] # get session name session_l = np.unique(np.asarray(df.session)) assert len(session_l) == 1 session = session_l[0] # histogramming counts, t_vals = myutils.times2bins( np.asarray(subdf.adj_time), f_samp=F_SAMP, t_start=t_start, t_stop=t_stop, bins=n_bins, return_t=True) # count trials n_trials = trial_counter(session=session, block=block_name, sound=sound_name, include_trials=include_trials) # comment this out, because user might request subset of # original trial set #if n_trials < len(np.unique(np.asarray(subdf.trial))): # raise ValueError("counted more trials than exist") # Add in the keyed info (session etc), plus # n_counts, n_trials, and bin this_frame = [list(key) + [sound_name, block_name, count, n_trials, t_val] for count, t_val in zip(counts, t_vals)] # append to growing list rec_l += this_frame # convert to new data frame, using same keyed columns plus our new ones cols = split_on + ['sound', 'block', 'counts', 'trials', 'time'] newdf = pandas.DataFrame(rec_l, columns=cols) return newdf
def calc_effect_size_by_sound(fdf, trial_lister=None, comp_meth='ttest', p_adj_meth='BH', split_on=None, split_on_filter=None, t_start=-.25, t_stop=.5, bins=75, mag_meth='diff'): """Calculates a heat map of effect size for each sound, masked by p-value. Groups spikes by :split_on:, eg nid. Then compares across blocks, using compare_rasters(comp_meth, p_adj_meth). fdf : flat spike times trial_list : SpikeSorter.list_trials_by_type fig : figure to plot into comp_meth : how to compare the binned spike times across blocks Returns mag_d : dict of effect magnitude by sound name p_d : dict of p-value by sound name names : list of values of :split_on: t : time bins """ names, mag_d, p_d = [], {}, {} if split_on is None: g = {None: fdf}.items() else: g = fdf.groupby(split_on) # Iterate over groups n_incl = 0 for key, df in g: if split_on_filter is not None and key not in split_on_filter: continue else: n_incl += 1 # get session name session_l = np.unique(np.asarray(df.session)) if len(session_l) != 1: print "error: must be exactly one session!" 1/0 session = session_l[0] names.append(key) # iterate over sound * block g1 = df.groupby(['sound', 'block']) for n, sound_name in enumerate(['lehi', 'rihi', 'lelo', 'rilo']): # Keep a running list in each sound if sound_name not in mag_d: mag_d[sound_name] = [] p_d[sound_name] = [] # iterate over blocks and get folded spike times fsdict = {} for block_name in ['LB', 'PB']: # get spikes form this sound * block try: x = df.ix[g1.groups[sound_name, block_name]] except KeyError: # no spikes of this sound * block! x = [] # fold by trial folded_spikes = fold(x, trial_lister(session, sound=sound_name, block=block_name)) # convert to bins ns, t = myutils.times2bins(folded_spikes, return_t=True, t_start=t_start, t_stop=t_stop, bins=bins, f_samp=30000.) fsdict[block_name] = ns.transpose() # Do the comparison mag, p = compare_rasters(fsdict['LB'], fsdict['PB'], meth=comp_meth, p_adj_meth=p_adj_meth, mag_meth=mag_meth) mag_d[sound_name].append(mag) p_d[sound_name].append(p) for key in mag_d: mag_d[key] = np.array(mag_d[key]) p_d[key] = np.array(p_d[key]) if split_on_filter is not None and len(split_on_filter) != n_incl: print "warning: %d in filter but only found %d" % (len(split_on_filter), n_incl) return mag_d, p_d, names, t
def get_binned_spikes3(self, spike_filter=None, trial_filter=None): """Generic binning function operating on self._fsd spike_filter : dataframe describing how to split fsd The columns are the hierarchy to split on: eg ['session', 'unit'] The items are the ones to include. If no items, then everything is included. If None, then bin over everything except 'adj_time' or 'spike_time' Here we delineate every combination because it's not separable over session and unit (usually). trial_filter : How to do this filtering? For instance: hits only, stim_numbers 1-12, expressed as dicts In this case I wouldn't want to enumerate every combination because I can just take intersection over stim numbers and outcome. Although note we might want to combine errors and wrongports, for instance. It's implicit that we want to do this for each session in spike_filter. First the spikes are grouped over the columns in spike_filter. For each group, the trials are grouped over the columns in trial_filter. This cross-result is histogrammed. All results are concatenated and returned. The actual histogramming is done by myutils.times2bins using self.f_samp, t_start, t_stop, bins """ input = self._fsd # default, use all columns and include all data if spike_filter is None: col_list = input.columns.tolist() remove_cols = ['adj_time', 'spike_time'] for col in remove_cols: if col in col_list: col_list.remove(col) spike_filter = pandas.DataFrame(columns=col_list) # Choose data from `input` by defining the following variables: # `keylist` : a list of keys to include, each separately binned # `grouped_data` : a dict from each key in keylist, to the data # `keynames` : what to call each entry of the key in the result if len(spike_filter) == 0: # use all data keynames = spike_filter.columns.tolist() keylist = [tuple([myutils.only_one(input[col]) for col in keynames])] val = input grouped_data = {keylist[0]: val} elif len(spike_filter) == 1: # Optimized for the case of selecting a single unit d = {} for col in spike_filter: d[col] = spike_filter[col][0] mask = myutils.pick_mask(input, **d) keylist = spike_filter.to_records(index=False) # length 1 keynames = spike_filter.columns.tolist() grouped_data = {keylist[0] : input.ix[mask]} else: # standard case g = input.groupby(spike_filter.columns.tolist()) grouped_data = g.groups keylist = spike_filter.to_records(index=False) keynames = spike_filter.columns.tolist() # Now group the trials att = self.all_trials.reset_index().set_index(['session', 'trial'], drop=False) g2 = att.groupby(trial_filter.columns.tolist()) #g2 = self.all_trials.groupby(trial_filter.columns.tolist()) # Now iterate through the keys in keylist and the corresponding values # in grouped_data. rec_l = [] for key in keylist: # Take the data from this group subdf = grouped_data[key] for g2k, g2v in g2: # count trials of this type from this session session = myutils.only_one(subdf.session) n_trials = len(g2v.ix[session]) if n_trials == 0: # for example if a possible combination never actually # occurred continue # Join the spikes on the columns of trial filter subsubdf = subdf.join(g2v[trial_filter.columns], on=['session', 'trial'], how='inner', rsuffix='rrr') # check for already-joined columns for col in trial_filter.columns: if col+'rrr' in subsubdf.columns: assert (subsubdf[col] == subsubdf[col+'rrr']).all() subsubdf.pop(col + 'rrr') # histogramming counts, t_vals = myutils.times2bins( np.asarray(subsubdf.adj_time), return_t=True, f_samp=self.f_samp, t_start=self.t_start, t_stop=self.t_stop, bins=self.bins) # Add in the keyed info (session etc), plus # n_counts, n_trials, and bin frame_label = list(key) + list(np.array([g2k]).flatten()) this_frame = [frame_label + [count, n_trials, t_val, bin] for bin, (count, t_val) in enumerate(zip(counts, t_vals))] # append to growing list rec_l += this_frame # convert to new data frame, using same keyed columns plus our new ones cols = keynames + trial_filter.columns.tolist() + [ 'counts', 'trials', 'time', 'bin'] newdf = pandas.DataFrame(rec_l, columns=cols) # drop the combinations that never actually occurred (future trials) return newdf
def get_binned_spikes_by_trial(self, split_on, split_on_filter=None, f_samp=30e3, t_start=-.25, t_stop=.5, bins=75, include_trials='hits'): """Returns binned data separately for each trial. There is a variable number of columns bin%d, depending on the number you request. Format: <class 'pandas.core.frame.DataFrame'> Int64Index: 23202 entries, 0 to 23201 Data columns: session 23202 non-null values tetrode 23202 non-null values sound 23202 non-null values block 23202 non-null values trial 23202 non-null values bin0 23202 non-null values dtypes: int64(3), object(3) """ fsd = self.read_flat_spikes_and_trials(stim_number_filter=range(5,13), include_trials=include_trials) replace_stim_numbers_with_names(fsd) g = fsd.groupby(split_on) #df = pandas.DataFrame() dfs = [] for key, val in g: if split_on_filter is not None and key not in split_on_filter: continue for sound_name in ['lehi', 'rihi', 'lelo', 'rilo']: for block_name in ['LB', 'PB']: # subframe subdf = val[(val.sound == sound_name) & (val.block == block_name)] # get session name session_l = np.unique(np.asarray(subdf.session)) if len(session_l) == 0: continue elif len(session_l) > 1: raise "Non-unique sessions, somehow" else: session = session_l[0] trial_list = self.list_trials_by_type(session=session, sound=sound_name, block=block_name, include_trials=include_trials) counts, times = myutils.times2bins( fold(subdf, trial_list), f_samp=f_samp, t_start=t_start, t_stop=t_stop, bins=bins, return_t=True) this_frame = [list(key) + [sound_name, block_name, trial] + list(count) for count, trial in zip(counts, trial_list)] dfs.append(pandas.DataFrame(this_frame, columns=(split_on + ['sound', 'block', 'trial'] + ['bin%d' % n for n in range(counts.shape[1])])))#, # ignore_index=True) return pandas.concat(dfs, ignore_index=True)