def trials_by_param(self, param, vals=None, comb_vals=False): """Return trials grouped by (selected) values of trial parameter.""" # Error check. if param not in self.TrData.columns: warnings.warn('Unknown trial parameter {}'.format(param)) return pd.Series() # Group indices by values of parameter. tr_grps = pd.Series(self.TrData.groupby([param]).groups) tr_grps = self.filter_trials(tr_grps) # Default: all values of parameter. if vals is None: vals = self.sort_feature_values(param, tr_grps.keys().to_series()) else: # Remove any physical quantity. dtype = self.TrData[param].dtype vals = np.array(vals, dtype=dtype) # Convert to Series of trial list per parameter value. v_trs = [(v, np.array(tr_grps[v]) if v in tr_grps else np.empty(0)) for v in vals] tr_grps = util.series_from_tuple_list(v_trs) # Optionally, combine trials across feature values. if comb_vals: tr_grps = util.union_lists(tr_grps) return tr_grps
def run_logreg_across_time(rates, vfeat, vzscore_by=None, n_perm=0, n_pshfl=0, corr_trs=None, ncv=5, Cs=None): """Run logistic regression analysis across trial time.""" # Correct and error trials and targets. if corr_trs is None: corr_trs = pd.Series(True, index=vfeat.index) err_trs = ~corr_trs corr_feat, err_feat = [vfeat[trs] for trs in [corr_trs, err_trs]] # Check that we have enough trials to split into folds during CV. vcounts = corr_feat.value_counts() if (vcounts < ncv).any(): if verbose: warnings.warn('Not enough trials to do decoding with CV') return # Prepare data for running analysis in pool. LRparams = [] t_uids = [] for t, rt in rates.items(): rtmat = rt.unstack().T # get rates and format to (trial x unit) matrix if vzscore_by is not None: # z-score by condition level rtmat = zscore_by_cond(rtmat, vzscore_by) corr_rates, err_rates = [rtmat.loc[trs] for trs in [corr_trs, err_trs]] LRparams.append((corr_rates, corr_feat, n_perm, n_pshfl, None, ncv, Cs)) t_uids.append(rtmat.columns) # Run logistic regression at each time point. res = zip(*util.run_in_pool(run_logreg, LRparams)) lScores, lClasses, lCoefs, lC, lPerm, lPsdo = res # Put results into series and dataframes. tvec = rates.columns # Best regularisation parameter value. C = pd.Series(list(lC), index=tvec) # Prediction scores over time. Scores = pd.DataFrame.from_records(lScores, index=tvec).T # Coefficients (unit by value) over time. coef_ser = {t: pd.DataFrame(lCoefs[i], columns=t_uids[i], index=lClasses[i]).unstack() for i, t in enumerate(tvec)} Coefs = pd.concat(coef_ser, axis=1) # Permutation and population shuffling results. Perm = pd.concat(lPerm, axis=1, keys=tvec) Psdo = pd.concat(lPsdo, axis=1, keys=tvec) # Collect results. res = [('Scores', Scores), ('Coefs', Coefs), ('C', C), ('Perm', Perm), ('Psdo', Psdo)] res = util.series_from_tuple_list(res) return res
def get_unit_info_title(u, fullname=False): """Plot unit info as text labels.""" # Init dict of info labels to plot. upars = u.get_unit_params() # Init formatted parameter values. fpars = [('isolation', '{}'), ('SNR', 'SNR: {:.2f}'), ('ISIvr', 'ISIvr: {:.2f}%'), ('TrueSpikes', 'TrSpRt: {:.0f}%'), ('BS/NS', '{}'), ('mWfDur', 'Wf dur: {:.0f} $\mu$s'), ('Fac/Sup', '{}'), ('mFR', 'mean rate: {:.1f} sp/s'), ('baseline', 'baseline rate: {:.1f} sp/s'), ('included', 'included')] fvals = [(meas, f.format(upars[meas]) if meas in upars else 'N/A') for meas, f in fpars] fvals = util.series_from_tuple_list(fvals) # Create info lines. # Start with unit name and 'excluded' tag if unit is excluded from task. header = upars.Name if fullname else upars.task header += ' [excluded]' if u.is_excluded() else '' info_lines = '\n\n{}\n\n\n\n'.format(header) # Add stimulus parameters. s1locs, s2locs = [ ', '.join([ '({:.1f}, {:.1f})'.format(x, y) for (x, y) in u.TrData[(stim, 'Loc')].unique() ]) for stim in ('S1', 'S2') ] info_lines += 'S1 locations: {} | S2 locations: {}\n\n'.format( s1locs, s2locs) # Unit type. info_lines += '{} ({}, {}, {})\n\n'.format(fvals['isolation'], fvals['SNR'], fvals['ISIvr'], fvals['TrueSpikes']) # Waveform duration. info_lines += '{}\n\n'.format(fvals['mWfDur']) #info_lines += '{} ({})\n\n'.format(fvals['BS/NS'], fvals['mWfDur']) # Firing rate. # Facilitatory or suppressive? info_lines += '{}\n\n'.format(fvals['baseline']) #info_lines += '{}, {}, {}\n\n'.format(fvals['Fac/Sup'], fvals['mFR'], # fvals['baseline']) return info_lines
def trials_by_params(self, params): """Return trials grouped by value combinations of trial parameters.""" # Error check. for param in params: if param not in self.TrData.columns: warnings.warn('Unknown trial parameter {}'.format(param)) return pd.Series() # Group indices by values of parameter. tr_grps = pd.Series(self.TrData.groupby(params).groups) tr_grps = self.filter_trials(tr_grps) val_combs = tr_grps.keys().to_series() # Convert to Series of trial list per parameter value. v_trs = [(vc, np.array(tr_grps[vc]) if vc in tr_grps else np.empty(0)) for vc in val_combs] tr_grps = util.series_from_tuple_list(v_trs) return tr_grps
def __init__(self, TPLCell=None, rec_info=None, kset=None): """Create Unit instance from TPLCell data structure.""" # Create empty instance. self.Name = '' self.UnitParams = pd.Series() self.SessParams = pd.Series() self.Waveforms = pd.DataFrame() self.SpikeParams = pd.DataFrame() self.Events = pd.DataFrame() self.TrData = pd.DataFrame() self._Spikes = Spikes([]) self._Rates = pd.Series() self.QualityMetrics = pd.Series() self.DS = pd.Series() self.TaskRelPrds = pd.Series() # Default unit params. self.UnitParams['empty'] = True self.UnitParams['excluded'] = True # Return if no TPLCell is passed. if TPLCell is None: return # %% Session parameters. # Prepare session params. fname_pars = util.params_from_fname(TPLCell.File) subj, date, elec = fname_pars[['subj', 'date', 'elec']] task, task_idx, sortno = fname_pars[['task', 'idx', 'sortno']] [ch, ux] = TPLCell.ChanUnit sampl_prd = (1 / (TPLCell.Info.Frequency * Hz)).rescale(us) pinfo = [p.tolist() if isinstance(p, np.ndarray) else p for p in TPLCell.PInfo] # Assign session params. sp_list = [('task', task), ('task #', task_idx), ('subj', subj), ('date', date), ('elec', elec), ('ch', ch), ('ux', ux), ('sort #', sortno), ('filepath', TPLCell.Filename), ('filename', TPLCell.File), ('paraminfo', pinfo), ('sampl_prd', sampl_prd)] self.SessParams = util.series_from_tuple_list(sp_list) self.SessParams = self.SessParams.append(rec_info) # Name unit. self.set_name() # Unit params. self.UnitParams['empty'] = False self.UnitParams['excluded'] = False # %% Waveforms. if 'Waves' in TPLCell._fieldnames: wfs = TPLCell.Waves if wfs.ndim == 1: # there is only a single spike wfs = np.reshape(wfs, (1, len(wfs))) # extend it to matrix wf_sampl_t = float(sampl_prd) * np.arange(wfs.shape[1]) self.Waveforms = pd.DataFrame(wfs, columns=wf_sampl_t) # %% Spike params. if 'Spikes' in TPLCell._fieldnames: spk_pars = [('time', util.fill_dim(np.array(TPLCell.Spikes))), ('included', True)] self.SpikeParams = pd.DataFrame.from_items(spk_pars) # %% Stimulus parameters. stim_params = constants.stim_params # Extract all trial parameters. trpars = pd.DataFrame(TPLCell.TrialParams, columns=TPLCell.Header) # Extract stimulus parameters. StimParams = trpars[stim_params.name] StimParams.columns = stim_params.index # Change type if required. stim_pars = StimParams.copy() for stim_par in stim_pars: stim_type = stim_params.type[stim_par] if stim_type is not None: stim_pars[stim_par] = stim_pars[stim_par].astype(stim_type) StimParams = stim_pars # Combine x and y stimulus coordinates into a single location variable. stim_pars = StimParams.copy() for stim in stim_pars.columns.levels[0]: pstim = stim_pars[stim] if ('LocX' in pstim.columns) and ('LocY' in pstim.columns): lx, ly = pstim.LocX, pstim.LocY stim_pars[stim, 'Loc'] = [(x, y) for x, y in zip(lx, ly)] StimParams = stim_pars.sort_index(axis=1) # Add same-different columns (S/D trials). feats = np.unique([f[1] for f in StimParams.columns if util.is_iterable(f) and len(f) == 2]) for feat in feats: s1f, s2f, dsf = ('S1', feat), ('S2', feat), ('S_D', feat) if (s1f in StimParams) and (s2f in StimParams): StimParams[dsf] = 'diff' isame = (StimParams[s1f] == StimParams[s2f]) StimParams.loc[isame, dsf] = 'same' # %% Subject answer parameters. Answer = pd.DataFrame() # Recode correct/incorrect answer column. corr_ans = trpars['subjectAnswer'] if len(corr_ans.unique()) > 2: corr_ans_vals = ', '.join([str(v) for v in corr_ans.unique()]) warnings.warn(('More than 2 unique values for correct answer: ' + corr_ans_vals)) corr_ans = corr_ans == corr_ans.max() # higher value is correct! Answer['correct'] = corr_ans # Add column for subject response (saccade direction). same_dir = StimParams['S1', 'Dir'] == StimParams['S2', 'Dir'] # This is not actually correct for passive task! Answer['saccade'] = ((same_dir & corr_ans) | (~same_dir & ~corr_ans)) # %% Trial events. # Timestamps of events. Only S1 offset and S2 onset are reliable! # S1 onset and S2 offset are fixed to these two. # Altogether these four are called anchor events. # Watch out: indexing starting with 1 in TPLCell (Matlab)! # Everything is in seconds below! if 'rel_times' in TPLCell._fieldnames: # Use relative times aligned to trial start (single-unit data). rel_times = TPLCell.rel_times anchor_evts = [('S1 on', rel_times.S1_on), ('S1 off', rel_times.S1_off), ('S2 on', rel_times.S2_on), ('S2 off', rel_times.S2_off)] else: # Use absolute times (multi-unit data). S1dur = float(constants.stim_dur['S1'].rescale(s)) S2dur = float(constants.stim_dur['S2'].rescale(s)) iS1off = TPLCell.Patterns.matchedPatterns[:, 2]-1 iS2on = TPLCell.Patterns.matchedPatterns[:, 3]-1 ts = TPLCell.Timestamps anchor_evts = [('S1 on', ts[iS1off]-S1dur), ('S1 off', ts[iS1off]), ('S2 on', ts[iS2on]), ('S2 off', ts[iS2on]+S2dur)] anchor_evts = pd.DataFrame.from_items(anchor_evts) # Align trial events to S1 onset. S1_onset = anchor_evts['S1 on'] # this is also used below! anchor_evts = anchor_evts.subtract(S1_onset, axis=0) # Add additional trial events, relative to anchor events. evts = [(evt, anchor_evts[rel]+float(offset.rescale(s))) for evt, (rel, offset) in constants.tr_evts.iterrows()] evts = pd.DataFrame.from_items(evts) # Update saccade (end of recording) if info available. if ('rel_times' in TPLCell._fieldnames and 'saccade' in TPLCell.rel_times._fieldnames): evts['saccade'] = TPLCell.rel_times.saccade - S1_onset # Add dimension to timestamps (ms). for evt in evts: evts[evt] = util.add_dim_to_series(1000*evts[evt], ms) # s --> ms self.Events = evts # %% Trial parameters TrialParams = pd.DataFrame() # Add start time, end time and length of each trials. if 'Timestamps' in TPLCell._fieldnames: tstamps = TPLCell.Timestamps tr_times = np.array([(tstamps[i1-1], tstamps[i2-1]) for i1, i2 in TPLCell.Info.successfull_trials_indices]) tr_times = tr_times * s for name, col in [('TrialStart', tr_times[:, 0]), ('TrialStop', tr_times[:, 1]), ('TrialLength', tr_times[:, 1]-tr_times[:, 0])]: util.add_quant_col(TrialParams, col, name) # Add trial period lengths to trial params. TrialParams['S1Len'] = evts['S1 off'] - evts['S1 on'] TrialParams['S2Len'] = evts['S2 off'] - evts['S2 on'] TrialParams['DelayLenPrec'] = evts['S2 on'] - evts['S1 off'] # "Categorical" (rounded) delay length variable. delay_lens = util.dim_series_to_array(TrialParams['DelayLenPrec']) len_diff = [(i, np.abs(delay_lens - dl)) for i, dl in enumerate(constants.del_lens)] min_diff = pd.DataFrame.from_items(len_diff).idxmin(1) dlens = constants.del_lens[min_diff] TrialParams['DelayLen'] = list(util.remove_dim_from_series(dlens)) # Add target feature to be reported. if task == 'com': # Combined task: target feature varies. to_report = trpars.TrialType.replace([0, 1], ['loc', 'dir']) else: to_report = constants.to_report(task) TrialParams['ToReport'] = to_report # Init included trials (all trials included initially). TrialParams['included'] = np.array(True, dtype=bool) # %% Assamble full trial data frame. StimParams.columns = StimParams.columns.tolist() self.TrData = pd.concat([TrialParams, StimParams, Answer], axis=1) # %% Spikes. # Trials spikes, aligned to S1 onset. spk_trains = [(spk_train - S1_onset[i]) * s # align to S1 on for i, spk_train in enumerate(TPLCell.TrialSpikes)] t_starts = self.ev_times('fixate') # start of trial t_stops = self.ev_times('saccade') # end of trial self._Spikes = Spikes(spk_trains, t_starts, t_stops) # %% Rates. # Estimate firing rate in each trial. for name, (kernel, step) in kset.iterrows(): self.add_rate(name, kernel, step)
def test_task_relatedness(u, p_th=0.05): """Test if unit has any task related activity.""" # Init. nrate = u.init_nrate() wndw_len, minFR = QC_THs.loc[u.get_region()] if not len(u.inc_trials()): return False # Get baseline rate per trial. baseline = util.remove_dim_from_series(u.get_prd_rates('baseline')) # Init periods and trials sets to test. feats = ('Dir', ) # ('Dir', 'Loc') prds_trs = [('S1', [('S1', 'early delay', 'late delay'), feats]), ('S2', [('S2', 'post-S2'), feats])] prds_trs = pd.DataFrame.from_items(prds_trs, orient='index', columns=['prds', 'trpars']) # Go through each stimulus, period and trial parameter to be tested. pval = [] mean_rate = [] for stim, (prds, trpars) in prds_trs.iterrows(): for prd in prds: t1s, t2s = u.pr_times(prd, add_latency=False, concat=False) for par in trpars: ptrs = u.trials_by_param((stim, par)) for vpar, trs in ptrs.iteritems(): # Get rates during period on trials with given param value. rates = u._Rates[nrate].get_rates(trs, t1s, t2s) bs_rates = baseline[trs] # No rates available. if rates.empty: continue # Get sub-period around time with maximal rate. tmax = rates.mean().argmax() tmin, tmax = rates.columns.min(), rates.columns.max() tstart, tend = stats.prd_in_window(tmax, tmin, tmax, wndw_len, ms) tidx = (rates.columns >= tstart) & (rates.columns <= tend) # Test difference from baseline rate. wnd_rates = rates.loc[:, tidx].mean(1) stat, p = stats.mann_whithney_u_test(wnd_rates, bs_rates) pval.append(((stim, prd, par, str(vpar)), p)) # Mean rate. mrate = rates.mean().mean() mean_rate.append(((stim, prd, par, str(vpar)), mrate)) # Format results. names = ['stim', 'prd', 'par', 'vpar'] pval, mean_rate = [ util.series_from_tuple_list(res, names) for res in (pval, mean_rate) ] # Save results to unit. u.PrdParTests = pd.concat([mean_rate, pval], axis=1, keys=['mean_rate', 'pval']) u.PrdParTests['sign'] = u.PrdParTests['pval'] < p_th # Save test parameters. u.PrdParTests.test = 'mann_whithney_u_test' u.PrdParTests.p_th = p_th # Is there any task- (stimulus-parameter-) related period? has_min_rate = (u.PrdParTests.mean_rate >= minFR).any() is_task_related = u.PrdParTests.sign.any() return has_min_rate, is_task_related
def run_logreg(X, y, n_perm=0, n_pshfl=0, cv_obj=None, ncv=5, Cs=None, multi_class=None, solver=None, class_weight='balanced'): """ Run logistic regression with number of cross-validation folds (ncv) and internal regularization over a number of regularisation parameters (Cs). """ # Remove missing values from data. idx = np.logical_and(np.all(~np.isnan(X), 1), [yi is not None for yi in y]) X, y = np.array(X[idx]), np.array(y[idx]) # Init data params. classes, vcounts = np.unique(y, return_counts=True) ntrials, nfeatures = X.shape nclasses = len(classes) binary = is_binary(y) # Deal with binary case. class_names = [classes[1]] if binary else classes nclasspars = 1 if binary else nclasses # Init results. res = [('score', np.nan * np.zeros(ncv)), ('class_names', class_names), ('coef', np.nan * np.zeros((nclasspars, nfeatures))), ('C', np.nan), ('perm', pd.Series(np.nan, index=['mean', 'std', 'pval'])), ('psdo', pd.Series(np.nan, index=['mean', 'std', 'pval']))] res = util.series_from_tuple_list(res) # Check that there's at least two classes. if nclasses < 2: if verbose: warnings.warn('Number of different values in y is less then 2!') return res # Check that we have enough trials to split into folds during CV. if np.any(vcounts < ncv): if verbose: warnings.warn('Not enough trials to split into folds during CV') return res # Init LogRegCV parameters. if multi_class is None: multi_class = 'ovr' if binary else 'multinomial' if solver is None: solver = 'lbfgs' if len(y) < 500 else 'sag' if cv_obj is None: cv_obj = StratifiedKFold(n_splits=ncv, shuffle=True, random_state=seed) if Cs is None: Cs = [1] # no regularisation by default # Create LogRegress solver. LRCV = LogisticRegressionCV(solver=solver, Cs=Cs, cv=cv_obj, multi_class=multi_class, refit=True, class_weight=class_weight) # Fit logistic regression. class_names, C, score = fit_LRCV(LRCV, X, y) res['C'] = C res['score'] = score # Coefficients (weights) of features by predictors. coef = LRCV.coef_ res['coef'] = coef # Run permutation testing. if n_perm > 0: r = permutation_test_score(LRCV, X, y, scoring='accuracy', cv=cv_obj, n_permutations=n_perm, random_state=seed) _, perm_scores, perm_p = r res['perm']['mean'] = perm_scores.mean() res['perm']['std'] = perm_scores.std() res['perm']['pval'] = perm_p # Run decoding on rate matrix with trials shuffled within units. if n_pshfl > 0: shfld_scores = np.array([fit_LRCV(LRCV, pop_shfl(X, y), y)[2] for i in range(n_pshfl)]).mean(1) res['psdo']['mean'] = shfld_scores.mean() res['psdo']['std'] = shfld_scores.std() res['psdo']['pval'] = stats.perm_pval(score.mean(), shfld_scores) return res