コード例 #1
0
ファイル: unit.py プロジェクト: mnislamraju/seal
    def trials_by_param(self, param, vals=None, comb_vals=False):
        """Return trials grouped by (selected) values of trial parameter."""

        # Error check.
        if param not in self.TrData.columns:
            warnings.warn('Unknown trial parameter {}'.format(param))
            return pd.Series()

        # Group indices by values of parameter.
        tr_grps = pd.Series(self.TrData.groupby([param]).groups)
        tr_grps = self.filter_trials(tr_grps)

        # Default: all values of parameter.
        if vals is None:
            vals = self.sort_feature_values(param, tr_grps.keys().to_series())
        else:
            # Remove any physical quantity.
            dtype = self.TrData[param].dtype
            vals = np.array(vals, dtype=dtype)

        # Convert to Series of trial list per parameter value.
        v_trs = [(v, np.array(tr_grps[v]) if v in tr_grps else np.empty(0))
                 for v in vals]
        tr_grps = util.series_from_tuple_list(v_trs)

        # Optionally, combine trials across feature values.
        if comb_vals:
            tr_grps = util.union_lists(tr_grps)

        return tr_grps
コード例 #2
0
def run_logreg_across_time(rates, vfeat, vzscore_by=None, n_perm=0,
                           n_pshfl=0, corr_trs=None, ncv=5, Cs=None):
    """Run logistic regression analysis across trial time."""

    # Correct and error trials and targets.
    if corr_trs is None:
        corr_trs = pd.Series(True, index=vfeat.index)
    err_trs = ~corr_trs
    corr_feat, err_feat = [vfeat[trs] for trs in [corr_trs, err_trs]]

    # Check that we have enough trials to split into folds during CV.
    vcounts = corr_feat.value_counts()
    if (vcounts < ncv).any():
        if verbose:
            warnings.warn('Not enough trials to do decoding with CV')
        return

    # Prepare data for running analysis in pool.
    LRparams = []
    t_uids = []
    for t, rt in rates.items():

        rtmat = rt.unstack().T  # get rates and format to (trial x unit) matrix
        if vzscore_by is not None:  # z-score by condition level
            rtmat = zscore_by_cond(rtmat, vzscore_by)

        corr_rates, err_rates = [rtmat.loc[trs]
                                 for trs in [corr_trs, err_trs]]
        LRparams.append((corr_rates, corr_feat, n_perm,
                         n_pshfl, None, ncv, Cs))
        t_uids.append(rtmat.columns)

    # Run logistic regression at each time point.
    res = zip(*util.run_in_pool(run_logreg, LRparams))
    lScores, lClasses, lCoefs, lC, lPerm, lPsdo = res

    # Put results into series and dataframes.
    tvec = rates.columns
    # Best regularisation parameter value.
    C = pd.Series(list(lC), index=tvec)
    # Prediction scores over time.
    Scores = pd.DataFrame.from_records(lScores, index=tvec).T
    # Coefficients (unit by value) over time.
    coef_ser = {t: pd.DataFrame(lCoefs[i], columns=t_uids[i],
                                index=lClasses[i]).unstack()
                for i, t in enumerate(tvec)}
    Coefs = pd.concat(coef_ser, axis=1)
    # Permutation and population shuffling results.
    Perm = pd.concat(lPerm, axis=1, keys=tvec)
    Psdo = pd.concat(lPsdo, axis=1, keys=tvec)

    # Collect results.
    res = [('Scores', Scores), ('Coefs', Coefs), ('C', C),
           ('Perm', Perm), ('Psdo', Psdo)]
    res = util.series_from_tuple_list(res)

    return res
コード例 #3
0
def get_unit_info_title(u, fullname=False):
    """Plot unit info as text labels."""

    # Init dict of info labels to plot.
    upars = u.get_unit_params()

    # Init formatted parameter values.
    fpars = [('isolation', '{}'), ('SNR', 'SNR: {:.2f}'),
             ('ISIvr', 'ISIvr: {:.2f}%'), ('TrueSpikes', 'TrSpRt: {:.0f}%'),
             ('BS/NS', '{}'), ('mWfDur', 'Wf dur: {:.0f} $\mu$s'),
             ('Fac/Sup', '{}'), ('mFR', 'mean rate: {:.1f} sp/s'),
             ('baseline', 'baseline rate: {:.1f} sp/s'),
             ('included', 'included')]
    fvals = [(meas, f.format(upars[meas]) if meas in upars else 'N/A')
             for meas, f in fpars]
    fvals = util.series_from_tuple_list(fvals)

    # Create info lines.

    # Start with unit name and 'excluded' tag if unit is excluded from task.
    header = upars.Name if fullname else upars.task
    header += ' [excluded]' if u.is_excluded() else ''
    info_lines = '\n\n{}\n\n\n\n'.format(header)

    # Add stimulus parameters.
    s1locs, s2locs = [
        ', '.join([
            '({:.1f}, {:.1f})'.format(x, y)
            for (x, y) in u.TrData[(stim, 'Loc')].unique()
        ]) for stim in ('S1', 'S2')
    ]
    info_lines += 'S1 locations: {}  |  S2 locations: {}\n\n'.format(
        s1locs, s2locs)

    # Unit type.
    info_lines += '{} ({}, {}, {})\n\n'.format(fvals['isolation'],
                                               fvals['SNR'], fvals['ISIvr'],
                                               fvals['TrueSpikes'])
    # Waveform duration.
    info_lines += '{}\n\n'.format(fvals['mWfDur'])
    #info_lines += '{} ({})\n\n'.format(fvals['BS/NS'], fvals['mWfDur'])

    # Firing rate.
    # Facilitatory or suppressive?
    info_lines += '{}\n\n'.format(fvals['baseline'])
    #info_lines += '{}, {}, {}\n\n'.format(fvals['Fac/Sup'], fvals['mFR'],
    #                                      fvals['baseline'])

    return info_lines
コード例 #4
0
ファイル: unit.py プロジェクト: mnislamraju/seal
    def trials_by_params(self, params):
        """Return trials grouped by value combinations of trial parameters."""

        # Error check.
        for param in params:
            if param not in self.TrData.columns:
                warnings.warn('Unknown trial parameter {}'.format(param))
                return pd.Series()

        # Group indices by values of parameter.
        tr_grps = pd.Series(self.TrData.groupby(params).groups)
        tr_grps = self.filter_trials(tr_grps)

        val_combs = tr_grps.keys().to_series()

        # Convert to Series of trial list per parameter value.
        v_trs = [(vc, np.array(tr_grps[vc]) if vc in tr_grps else np.empty(0))
                 for vc in val_combs]
        tr_grps = util.series_from_tuple_list(v_trs)

        return tr_grps
コード例 #5
0
ファイル: unit.py プロジェクト: mnislamraju/seal
    def __init__(self, TPLCell=None, rec_info=None, kset=None):
        """Create Unit instance from TPLCell data structure."""

        # Create empty instance.
        self.Name = ''
        self.UnitParams = pd.Series()
        self.SessParams = pd.Series()
        self.Waveforms = pd.DataFrame()
        self.SpikeParams = pd.DataFrame()
        self.Events = pd.DataFrame()
        self.TrData = pd.DataFrame()
        self._Spikes = Spikes([])
        self._Rates = pd.Series()
        self.QualityMetrics = pd.Series()
        self.DS = pd.Series()
        self.TaskRelPrds = pd.Series()

        # Default unit params.
        self.UnitParams['empty'] = True
        self.UnitParams['excluded'] = True

        # Return if no TPLCell is passed.
        if TPLCell is None:
            return

        # %% Session parameters.

        # Prepare session params.
        fname_pars = util.params_from_fname(TPLCell.File)
        subj, date, elec = fname_pars[['subj', 'date', 'elec']]
        task, task_idx, sortno = fname_pars[['task', 'idx', 'sortno']]
        [ch, ux] = TPLCell.ChanUnit
        sampl_prd = (1 / (TPLCell.Info.Frequency * Hz)).rescale(us)
        pinfo = [p.tolist() if isinstance(p, np.ndarray)
                 else p for p in TPLCell.PInfo]

        # Assign session params.
        sp_list = [('task', task),
                   ('task #', task_idx),
                   ('subj', subj),
                   ('date', date),
                   ('elec', elec),
                   ('ch', ch),
                   ('ux', ux),
                   ('sort #', sortno),
                   ('filepath', TPLCell.Filename),
                   ('filename', TPLCell.File),
                   ('paraminfo', pinfo),
                   ('sampl_prd', sampl_prd)]
        self.SessParams = util.series_from_tuple_list(sp_list)
        self.SessParams = self.SessParams.append(rec_info)

        # Name unit.
        self.set_name()

        # Unit params.
        self.UnitParams['empty'] = False
        self.UnitParams['excluded'] = False

        # %% Waveforms.

        if 'Waves' in TPLCell._fieldnames:
            wfs = TPLCell.Waves
            if wfs.ndim == 1:  # there is only a single spike
                wfs = np.reshape(wfs, (1, len(wfs)))  # extend it to matrix
            wf_sampl_t = float(sampl_prd) * np.arange(wfs.shape[1])
            self.Waveforms = pd.DataFrame(wfs, columns=wf_sampl_t)

        # %% Spike params.

        if 'Spikes' in TPLCell._fieldnames:
            spk_pars = [('time', util.fill_dim(np.array(TPLCell.Spikes))),
                        ('included', True)]
            self.SpikeParams = pd.DataFrame.from_items(spk_pars)

        # %% Stimulus parameters.

        stim_params = constants.stim_params

        # Extract all trial parameters.
        trpars = pd.DataFrame(TPLCell.TrialParams, columns=TPLCell.Header)

        # Extract stimulus parameters.
        StimParams = trpars[stim_params.name]
        StimParams.columns = stim_params.index

        # Change type if required.
        stim_pars = StimParams.copy()
        for stim_par in stim_pars:
            stim_type = stim_params.type[stim_par]
            if stim_type is not None:
                stim_pars[stim_par] = stim_pars[stim_par].astype(stim_type)
        StimParams = stim_pars

        # Combine x and y stimulus coordinates into a single location variable.
        stim_pars = StimParams.copy()
        for stim in stim_pars.columns.levels[0]:
            pstim = stim_pars[stim]
            if ('LocX' in pstim.columns) and ('LocY' in pstim.columns):
                lx, ly = pstim.LocX, pstim.LocY
                stim_pars[stim, 'Loc'] = [(x, y) for x, y in zip(lx, ly)]
        StimParams = stim_pars.sort_index(axis=1)

        # Add same-different columns (S/D trials).
        feats = np.unique([f[1] for f in StimParams.columns
                           if util.is_iterable(f) and len(f) == 2])
        for feat in feats:
                s1f, s2f, dsf = ('S1', feat), ('S2', feat), ('S_D', feat)
                if (s1f in StimParams) and (s2f in StimParams):
                    StimParams[dsf] = 'diff'
                    isame = (StimParams[s1f] == StimParams[s2f])
                    StimParams.loc[isame, dsf] = 'same'

        # %% Subject answer parameters.

        Answer = pd.DataFrame()

        # Recode correct/incorrect answer column.
        corr_ans = trpars['subjectAnswer']
        if len(corr_ans.unique()) > 2:
            corr_ans_vals = ', '.join([str(v) for v in corr_ans.unique()])
            warnings.warn(('More than 2 unique values for correct answer: ' +
                           corr_ans_vals))
        corr_ans = corr_ans == corr_ans.max()  # higher value is correct!
        Answer['correct'] = corr_ans

        # Add column for subject response (saccade direction).
        same_dir = StimParams['S1', 'Dir'] == StimParams['S2', 'Dir']
        # This is not actually correct for passive task!
        Answer['saccade'] = ((same_dir & corr_ans) | (~same_dir & ~corr_ans))

        # %% Trial events.

        # Timestamps of events. Only S1 offset and S2 onset are reliable!
        # S1 onset and S2 offset are fixed to these two.
        # Altogether these four are called anchor events.

        # Watch out: indexing starting with 1 in TPLCell (Matlab)!
        # Everything is in seconds below!

        if 'rel_times' in TPLCell._fieldnames:
            # Use relative times aligned to trial start (single-unit data).
            rel_times = TPLCell.rel_times
            anchor_evts = [('S1 on', rel_times.S1_on),
                           ('S1 off', rel_times.S1_off),
                           ('S2 on', rel_times.S2_on),
                           ('S2 off', rel_times.S2_off)]

        else:
            # Use absolute times (multi-unit data).
            S1dur = float(constants.stim_dur['S1'].rescale(s))
            S2dur = float(constants.stim_dur['S2'].rescale(s))
            iS1off = TPLCell.Patterns.matchedPatterns[:, 2]-1
            iS2on = TPLCell.Patterns.matchedPatterns[:, 3]-1
            ts = TPLCell.Timestamps
            anchor_evts = [('S1 on', ts[iS1off]-S1dur),
                           ('S1 off', ts[iS1off]),
                           ('S2 on', ts[iS2on]),
                           ('S2 off', ts[iS2on]+S2dur)]

        anchor_evts = pd.DataFrame.from_items(anchor_evts)

        # Align trial events to S1 onset.
        S1_onset = anchor_evts['S1 on']  # this is also used below!
        anchor_evts = anchor_evts.subtract(S1_onset, axis=0)

        # Add additional trial events, relative to anchor events.
        evts = [(evt, anchor_evts[rel]+float(offset.rescale(s)))
                for evt, (rel, offset) in constants.tr_evts.iterrows()]
        evts = pd.DataFrame.from_items(evts)

        # Update saccade (end of recording) if info available.
        if ('rel_times' in TPLCell._fieldnames and
            'saccade' in TPLCell.rel_times._fieldnames):
            evts['saccade'] = TPLCell.rel_times.saccade - S1_onset

        # Add dimension to timestamps (ms).
        for evt in evts:
            evts[evt] = util.add_dim_to_series(1000*evts[evt], ms)  # s --> ms
        self.Events = evts

        # %% Trial parameters

        TrialParams = pd.DataFrame()

        # Add start time, end time and length of each trials.
        if 'Timestamps' in TPLCell._fieldnames:
            tstamps = TPLCell.Timestamps
            tr_times = np.array([(tstamps[i1-1], tstamps[i2-1]) for i1, i2
                                 in TPLCell.Info.successfull_trials_indices])
            tr_times = tr_times * s
            for name, col in [('TrialStart', tr_times[:, 0]),
                              ('TrialStop', tr_times[:, 1]),
                              ('TrialLength', tr_times[:, 1]-tr_times[:, 0])]:
                util.add_quant_col(TrialParams, col, name)

        # Add trial period lengths to trial params.
        TrialParams['S1Len'] = evts['S1 off'] - evts['S1 on']
        TrialParams['S2Len'] = evts['S2 off'] - evts['S2 on']
        TrialParams['DelayLenPrec'] = evts['S2 on'] - evts['S1 off']

        # "Categorical" (rounded) delay length variable.
        delay_lens = util.dim_series_to_array(TrialParams['DelayLenPrec'])
        len_diff = [(i, np.abs(delay_lens - dl))
                    for i, dl in enumerate(constants.del_lens)]
        min_diff = pd.DataFrame.from_items(len_diff).idxmin(1)
        dlens = constants.del_lens[min_diff]
        TrialParams['DelayLen'] = list(util.remove_dim_from_series(dlens))

        # Add target feature to be reported.
        if task == 'com':  # Combined task: target feature varies.
            to_report = trpars.TrialType.replace([0, 1], ['loc', 'dir'])
        else:
            to_report = constants.to_report(task)
        TrialParams['ToReport'] = to_report

        # Init included trials (all trials included initially).
        TrialParams['included'] = np.array(True, dtype=bool)

        # %% Assamble full trial data frame.

        StimParams.columns = StimParams.columns.tolist()
        self.TrData = pd.concat([TrialParams, StimParams, Answer], axis=1)

        # %% Spikes.

        # Trials spikes, aligned to S1 onset.
        spk_trains = [(spk_train - S1_onset[i]) * s  # align to S1 on
                      for i, spk_train in enumerate(TPLCell.TrialSpikes)]
        t_starts = self.ev_times('fixate')  # start of trial
        t_stops = self.ev_times('saccade')  # end of trial
        self._Spikes = Spikes(spk_trains, t_starts, t_stops)

        # %% Rates.

        # Estimate firing rate in each trial.
        for name, (kernel, step) in kset.iterrows():
            self.add_rate(name, kernel, step)
コード例 #6
0
def test_task_relatedness(u, p_th=0.05):
    """Test if unit has any task related activity."""

    # Init.
    nrate = u.init_nrate()
    wndw_len, minFR = QC_THs.loc[u.get_region()]
    if not len(u.inc_trials()):
        return False

    # Get baseline rate per trial.
    baseline = util.remove_dim_from_series(u.get_prd_rates('baseline'))

    # Init periods and trials sets to test.
    feats = ('Dir', )  # ('Dir', 'Loc')
    prds_trs = [('S1', [('S1', 'early delay', 'late delay'), feats]),
                ('S2', [('S2', 'post-S2'), feats])]
    prds_trs = pd.DataFrame.from_items(prds_trs,
                                       orient='index',
                                       columns=['prds', 'trpars'])

    # Go through each stimulus, period and trial parameter to be tested.
    pval = []
    mean_rate = []
    for stim, (prds, trpars) in prds_trs.iterrows():

        for prd in prds:
            t1s, t2s = u.pr_times(prd, add_latency=False, concat=False)

            for par in trpars:
                ptrs = u.trials_by_param((stim, par))

                for vpar, trs in ptrs.iteritems():

                    # Get rates during period on trials with given param value.
                    rates = u._Rates[nrate].get_rates(trs, t1s, t2s)
                    bs_rates = baseline[trs]

                    # No rates available.
                    if rates.empty:
                        continue

                    # Get sub-period around time with maximal rate.
                    tmax = rates.mean().argmax()
                    tmin, tmax = rates.columns.min(), rates.columns.max()
                    tstart, tend = stats.prd_in_window(tmax, tmin, tmax,
                                                       wndw_len, ms)
                    tidx = (rates.columns >= tstart) & (rates.columns <= tend)

                    # Test difference from baseline rate.
                    wnd_rates = rates.loc[:, tidx].mean(1)
                    stat, p = stats.mann_whithney_u_test(wnd_rates, bs_rates)
                    pval.append(((stim, prd, par, str(vpar)), p))

                    # Mean rate.
                    mrate = rates.mean().mean()
                    mean_rate.append(((stim, prd, par, str(vpar)), mrate))

    # Format results.
    names = ['stim', 'prd', 'par', 'vpar']
    pval, mean_rate = [
        util.series_from_tuple_list(res, names) for res in (pval, mean_rate)
    ]

    # Save results to unit.
    u.PrdParTests = pd.concat([mean_rate, pval],
                              axis=1,
                              keys=['mean_rate', 'pval'])
    u.PrdParTests['sign'] = u.PrdParTests['pval'] < p_th

    # Save test parameters.
    u.PrdParTests.test = 'mann_whithney_u_test'
    u.PrdParTests.p_th = p_th

    # Is there any task- (stimulus-parameter-) related period?
    has_min_rate = (u.PrdParTests.mean_rate >= minFR).any()
    is_task_related = u.PrdParTests.sign.any()

    return has_min_rate, is_task_related
コード例 #7
0
def run_logreg(X, y, n_perm=0, n_pshfl=0, cv_obj=None, ncv=5, Cs=None,
               multi_class=None, solver=None, class_weight='balanced'):
    """
    Run logistic regression with number of cross-validation folds (ncv) and
    internal regularization over a number of regularisation parameters (Cs).
    """

    # Remove missing values from data.
    idx = np.logical_and(np.all(~np.isnan(X), 1),
                         [yi is not None for yi in y])
    X, y = np.array(X[idx]), np.array(y[idx])

    # Init data params.
    classes, vcounts = np.unique(y, return_counts=True)
    ntrials, nfeatures = X.shape
    nclasses = len(classes)
    binary = is_binary(y)

    # Deal with binary case.
    class_names = [classes[1]] if binary else classes
    nclasspars = 1 if binary else nclasses

    # Init results.
    res = [('score', np.nan * np.zeros(ncv)), ('class_names', class_names),
           ('coef', np.nan * np.zeros((nclasspars, nfeatures))), ('C', np.nan),
           ('perm', pd.Series(np.nan, index=['mean', 'std', 'pval'])),
           ('psdo', pd.Series(np.nan, index=['mean', 'std', 'pval']))]
    res = util.series_from_tuple_list(res)

    # Check that there's at least two classes.
    if nclasses < 2:
        if verbose:
            warnings.warn('Number of different values in y is less then 2!')
        return res

    # Check that we have enough trials to split into folds during CV.
    if np.any(vcounts < ncv):
        if verbose:
            warnings.warn('Not enough trials to split into folds during CV')
        return res

    # Init LogRegCV parameters.
    if multi_class is None:
        multi_class = 'ovr' if binary else 'multinomial'

    if solver is None:
        solver = 'lbfgs' if len(y) < 500 else 'sag'

    if cv_obj is None:
        cv_obj = StratifiedKFold(n_splits=ncv, shuffle=True,
                                 random_state=seed)

    if Cs is None:
        Cs = [1]   # no regularisation by default

    # Create LogRegress solver.
    LRCV = LogisticRegressionCV(solver=solver, Cs=Cs, cv=cv_obj,
                                multi_class=multi_class, refit=True,
                                class_weight=class_weight)

    # Fit logistic regression.
    class_names, C, score = fit_LRCV(LRCV, X, y)
    res['C'] = C
    res['score'] = score

    # Coefficients (weights) of features by predictors.
    coef = LRCV.coef_
    res['coef'] = coef

    # Run permutation testing.
    if n_perm > 0:
        r = permutation_test_score(LRCV, X, y, scoring='accuracy', cv=cv_obj,
                                   n_permutations=n_perm, random_state=seed)
        _, perm_scores, perm_p = r
        res['perm']['mean'] = perm_scores.mean()
        res['perm']['std'] = perm_scores.std()
        res['perm']['pval'] = perm_p

    # Run decoding on rate matrix with trials shuffled within units.
    if n_pshfl > 0:
        shfld_scores = np.array([fit_LRCV(LRCV, pop_shfl(X, y), y)[2]
                                 for i in range(n_pshfl)]).mean(1)
        res['psdo']['mean'] = shfld_scores.mean()
        res['psdo']['std'] = shfld_scores.std()
        res['psdo']['pval'] = stats.perm_pval(score.mean(), shfld_scores)

    return res