Python get_data_index 예제들, cmlreaders.get_data_index Python 예제들

예제 #1

0

파일 보기

    def test_get_data_index(self, protocol):
        if protocol is "all":
            path = resource_filename("cmlreaders.test.data", "r1.json")
        else:
            path = resource_filename("cmlreaders.test.data", protocol + ".json")

        with patch.object(PathFinder, "find", return_value=path):
            ix = CMLReader.get_data_index(protocol)
            assert all(ix == get_data_index(protocol))

예제 #2

0

파일 보기

def get_monts_and_sess_pairs(subj, exp='TH1'):
    """ Returns a df of mont/sess pairs for this subj in this exp.
        iterate easily through this list in the following format:
            "for (index, (mont, sess)) in monts_and_sess_pairs(subj).iterrows():"
    """
    df = get_data_index("r1")
    subjexp_df = (df[(df['subject'] == subj)
                     & (df['experiment'] == exp)])[['montage', 'session']]
    subjexp_df.index = range(len(subjexp_df))
    return subjexp_df

예제 #3

0

파일 보기

def exp_df(exp='TH1'):
    """ Returns a df with ['subj', 'montage', 'session', 'exp']
        for each subj in this experiment. Use this for iterations.
    """
    warnings.filterwarnings('ignore')
    df = get_data_index("r1")
    df = df[df['experiment'] == exp]
    df['subj'] = df.pop('subject')
    df['exp'] = df.pop('experiment')
    warnings.resetwarnings()
    return df[['subj', 'montage', 'session', 'exp']]

예제 #4

0

파일 보기

파일: test_connectivity.py 프로젝트: pennmem/thetamod

def test_resting_state_connectivity(rhino_root):
    subject = "R1354E"

    index = get_data_index("r1", rhino_root)
    sessions = index[(index.subject == subject) &
                     (index.experiment == 'FR1')].session.unique()

    all_events = []
    all_resting = []
    data = []

    for session in sessions:
        reader = CMLReader(subject, 'FR1', session, rootdir=rhino_root)
        events = get_countdown_events(reader)
        resting = countdown_to_resting(events, reader.load('sources')['sample_rate'])

        all_events.append(events)
        all_resting.append(resting)

        eeg = read_eeg_data(reader, resting, reref=False)
        data.append(eeg)

    # Verify that events match Ethan's analysis; his events are ordered in an
    # odd way, so we have to sort them to make sure they match
    ethan = np.load(resource_filename("thetamod.test.data",
                                      "R1354E_events_ethan.npy"))
    assert_equal(sorted(ethan["eegoffset"]),
                 sorted(pd.concat(all_resting).eegoffset.values))

    eegs = TimeSeries.concatenate(data)
    eegs.data = ButterworthFilter(time_series=eegs.to_ptsa(),
                                  ).filter().values
    conn = get_resting_state_connectivity(eegs.to_mne(), eegs.samplerate)

    basename = ('{subject}_baseline3trials_network_theta-alpha.npy'
                .format(subject=subject))
    filename = Path(rhino_root).joinpath('scratch', 'esolo', 'tmi_analysis',
                                         subject, basename)

    data = np.load(filename)

    np.savez("test_output.npz",
             eeg=eegs.data,
             my_conn=conn,
             ethans_conn=data,
             events=pd.concat(all_events, ignore_index=True).to_records(),
             resting=pd.concat(all_resting, ignore_index=True).to_records())

    assert_almost_equal(scipy.special.logit(conn),
                        scipy.special.logit(data), 3)

예제 #5

0

파일 보기

def get_cmlevents(subj, montage=None, session=None, exp='TH1'):
    """ Returns the reformatted events df for subj and mont.
        This events struct does not include pathInfo, since that isn't
        recorded in the system used by cmlreaders. To get pathInfo you
        need to use `read_path_log`.
    """
    #------Load data index for RAM
    df = get_data_index("r1")
    #------Specify the df for this subject and exp
    this_df = df[(df['subject'] == subj) & (df['experiment'] == exp)]
    #------Find out the sessions, localization, and montage for this subject
    if session is None:  # default to first sess
        session = this_df['session'].iloc[0]
    if montage is None:  # default to first mont
        montage = this_df['montage'].iloc[0]
    #------Get more specific df
    this_specific_df = (this_df[(this_df['session'] == session)
                                & (this_df['montage'] == montage)])
    loc = int(this_specific_df.iloc()[0]['localization'])
    #-------Subjs with a montage above 0 have aliases used in log files
    subject_alias = this_specific_df['subject_alias'].iloc[0]
    # ^ use .iloc[0] bc this_specific_df has only one item
    #------For some subjs the sess ID system changed over time,
    #      and we need to know the original sess ID for certain log
    #      files access
    orig_sess_ID = this_specific_df['original_session'].iloc[0]
    if type(orig_sess_ID) == str:
        orig_sess_ID = np.float64(orig_sess_ID)
        # I do it as float first in case of NaN
        if orig_sess_ID == int(orig_sess_ID):
            orig_sess_ID = int(orig_sess_ID)
    if np.isnan(orig_sess_ID):
        orig_sess_ID = session
    #------Use CMLReader to read the events structure
    reader = CMLReader(subj,
                       exp,
                       session=session,
                       montage=montage,
                       localization=loc)
    events = reader.load('events')
    events['original_session_ID'] = orig_sess_ID
    events['subject_alias'] = subject_alias

    # remove the unhelpful and inconsistent SESS_START event
    events = events[events['type'] != 'SESS_START']

    return events

예제 #6

0

파일 보기

def get_eeg_ptsa(which,
                 reader,
                 stim_events,
                 buffer=50,
                 window=900,
                 stim_duration=500):
    from ptsa.data import readers, filters
    if which not in ("pre", "post"):
        raise ValueError("Specify 'pre' or 'post'")

    if which == "pre":
        rel_start = -(buffer + window)
        rel_stop = -buffer
    else:
        rel_start = buffer + stim_duration
        rel_stop = buffer + stim_duration + window

    idx = cmlreaders.get_data_index(rootdir=reader.rootdir)
    pair_file = idx.loc[(idx.subject == reader.subject)
                        & (idx.experiment == reader.experiment)
                        & (idx.session == reader.session)].pairs.unique()[0]

    talreader = readers.TalReader(filename=pair_file)
    channels = talreader.get_monopolar_channels()

    eeg = readers.EEGReader(events=stim_events,
                            channels=channels,
                            start_time=rel_start,
                            end_time=rel_stop).read()

    if 'bipolar_pairs' not in eeg.dims:
        eeg = filters.MonopolarToBipolarMapper(
            time_series=eeg,
            bipolar_pairs=talreader.get_bipolar_pairs()).filter()
    eeg = filters.ButterworthFilter(time_series=eeg,
                                    freqs=[58., 62.],
                                    filt_type='stop').filter()

    return eeg

예제 #7

0

파일 보기

파일: pipeline.py 프로젝트: pennmem/thetamod

    def get_reader(self,
                   subject: Optional[str] = None,
                   experiment: Optional[str] = None,
                   session: Optional[int] = None) -> CMLReader:
        """Return a reader for loading data. Defaults to the instance's subject,
        experiment, and session.

        """
        idx = get_data_index('r1', self.rootdir)

        subject = subject if subject is not None else self.subject
        experiment = experiment if experiment is not None else self.experiment
        session = int(session if session is not None else self.session)

        montage = idx.loc[(idx.subject == subject)
                          & (idx.experiment == experiment)
                          & (idx.session == session)].montage.unique()[0]

        return CMLReader(subject,
                         experiment,
                         session,
                         montage=montage,
                         rootdir=self.rootdir)

예제 #8

0

파일 보기

파일: pipeline.py 프로젝트: pennmem/thetamod

    def get_resting_connectivity(self) -> np.ndarray:
        """Compute resting state connectivity."""
        df = get_data_index(rootdir=self.rootdir)
        sessions = df[(df.subject == self.subject)
                      & (df.experiment == "FR1")].session.unique()

        if len(sessions) == 0:
            raise RuntimeError("No FR1 sessions exist for %s" % self.subject)
        # Read EEG data for "resting" events
        eeg_data = []
        for session in sessions:
            reader = self.get_reader(experiment="FR1", session=session)
            rate = reader.load('sources')['sample_rate']
            reref = not reader.load('sources')['name'].endswith('.h5')
            events = connectivity.get_countdown_events(reader)
            resting = connectivity.countdown_to_resting(events, rate)
            eeg = connectivity.read_eeg_data(reader, resting, reref=reref)
            eeg_data.append(eeg)

        eegs = TimeSeries.concatenate(eeg_data)
        conn = connectivity.get_resting_state_connectivity(
            eegs.to_mne(), eegs.samplerate)
        return conn

예제 #9

0

파일 보기

import xarray as xr

from ptsa.data.filters import ButterworthFilter
from ptsa.data.filters import MorletWaveletFilter
from ptsa.data.filters import ResampleFilter
from ptsa.data.timeseries import TimeSeries

from cmlreaders import CMLReader, get_data_index
from scipy.stats.mstats import zscore
from scipy.io import loadmat
from tqdm import tqdm
from glob import glob

# get the r1 dataframe on import so we don't have to keep doing it
try:
    r1_data = get_data_index("r1")
except KeyError:
    print('r1 protocol file not found')


def get_subjs_and_montages(task):
    """Returns a DataFrame with columns 'subject' and 'montage' listing participants in a given experiment.

    Parameters
    ----------
    task: str
        The experiment name (ex: TH1, FR1, ...).

    Returns
    -------
    pandas.DataFrame

예제 #10

0

파일 보기

    '/Users/tungphan/PycharmProjects/autoencoder_superEEG/autoencoder_single/cmlreaders/'
)
import cmlreader

rhino_root = "/Volumes/RHINO/"

# Instantiate the finder object
finder = cml.PathFinder(subject="R1389J",
                        experiment="catFR5",
                        session=1,
                        localization=0,
                        montage=0,
                        rootdir=rhino_root)

example_data_types = ['pairs', 'task_events', 'voxel_coordinates']
for data_type in example_data_types:
    print(finder.find(data_type))

from cmlreaders import get_data_index
r1_data = get_data_index(kind='r1', rootdir='/Volumes/RHINO/')
r1_data.head()
fr1_subjects = r1_data[r1_data['experiment'] == 'FR1']['subject'].unique()
fr1_subjects

reader = cml.CMLReader(subject="R1389J",
                       experiment="catFR5",
                       session=1,
                       localization=0,
                       montage=0,
                       rootdir=rhino_root)

예제 #11

0

파일 보기

파일: P_episode.py 프로젝트: jrudoler/cmlcode

def calc_subj_pep(subj,
                  elecs=None,
                  method='bip',
                  relstart=300,
                  relstop=1301,
                  freq_specs=(2, 120, 30),
                  percentthresh=.95,
                  numcyclesthresh=3,
                  load_eeg=False,
                  save_eeg=False,
                  save_result=False,
                  plot=False,
                  kind='r1',
                  experiment='FR1',
                  eeg_path='~/',
                  result_path='~/'):
    """
    Inputs:
    subj - subject string
    elecs - list of electrode pairs (strings)
    method - bip or avg depending on referencing scheme
    freq_specs - tuple of (low_freq, high_freq, num_freqs)
        for background fitting in BOSC.

    Returns:
    pep_all - average Pepisode for all words at each frequency
    pep_rec - average Pepisode for recalled words at each frequency
    pep_nrec - average Pepisode for non-recalled words at each frequency
    subj_tscores - t-score at each frequency, comparing rec and nrec
        across events
    ** Note that tscore is not itself meaningful because events are not
        independent. Comparing these tscores across subjects, however,
        is valid.
    """

    if save_eeg and load_eeg:
        raise ('Cannot save and load eeg simultaneously.')

    print('Subject: ', subj)
    if elecs is None:
        good_subj = pd.read_pickle(
            '/home1/jrudoler/Theta_Project/hippo_subject_pairs.csv')
        elecs = good_subj[good_subj['Subject'] == subj]['hippo_pairs'].iloc[0]
    subj_pepisode = None
    subj_recalled = None
    subj_tscores = None
    if plot:
        plt.figure(figsize=(12, 6))
    lowfreq, highfreq, numfreqs = freq_specs
    print(elecs)
    for pair_str in elecs:
        chans = pair_str.split('-')
        data = cml.get_data_index(kind=kind)
        data = data[data['experiment'] == experiment]
        sessions = data[data['subject'] == subj]['session'].unique()
        pepisodes = None  # events, freqs
        recalled = None  # events, freqs
        tscore = None
        for sess in sessions:
            try:
                print('Loading session {} EEG'.format(sess))
                reader = cml.CMLReader(subject=subj,
                                       experiment=experiment,
                                       session=sess)
                all_events = reader.load('task_events')
                if not os.path.exists(eeg_path):
                    os.makedirs(eeg_path)
                if load_eeg:
                    eeg = TimeSeries.from_hdf(eeg_path + 'session_' +
                                              str(sess) + '_' + pair_str)
                    bosc = P_episode(all_events,
                                     eeg,
                                     sr=eeg.samplerate.values,
                                     lowfreq=lowfreq,
                                     highfreq=highfreq,
                                     numfreqs=numfreqs)
                elif method == 'bip':
                    pairs = reader.load("pairs")
                    # bipolar eeg
                    bip = reader.load_eeg(
                        scheme=pairs[pairs.label == pair_str])\
                        .to_ptsa().mean(['event', 'channel'])
                    bip = ButterworthFilter(bip,
                                            freq_range=[58., 62.],
                                            filt_type='stop',
                                            order=4).filter()
                    print("Applying BOSC method!")
                    if save_eeg:
                        bip.to_hdf(eeg_path + 'session_' + str(sess) + '_' +
                                   pair_str)
                    bosc = P_episode(all_events,
                                     bip,
                                     sr=bip.samplerate.values,
                                     lowfreq=lowfreq,
                                     highfreq=highfreq,
                                     numfreqs=numfreqs)

                elif method == 'avg':
                    contacts = reader.load("contacts")
                    # average eeg
                    eeg = reader.load_eeg(
                        scheme=contacts).to_ptsa().mean('event')
                    # all zeros from a broken lead leads to -inf power,
                    # which results in a LinAlg error for log-log fit
                    # TODO: verify this channel exclusion doesn't cause any
                    # problems. Maybe print a message or raise an error?
                    bad_chan_mask = ~np.all(eeg.values == 0, axis=1)
                    contacts = contacts[bad_chan_mask]
                    eeg = eeg[bad_chan_mask, :]
                    avg = (eeg[contacts.label.str.contains(chans[0]) | \
                               contacts.label.str.contains(chans[1]), :] - eeg.mean('channel')
                           ).mean('channel')
                    avg = ButterworthFilter(avg,
                                            freq_range=[58., 62.],
                                            filt_type='stop',
                                            order=4).filter()
                    if save_eeg:
                        avg.to_hdf(eeg_path + '/session_' + str(sess) + '_' +
                                   pair_str)
                    bosc = P_episode(all_events,
                                     avg,
                                     sr=avg.samplerate.values,
                                     lowfreq=lowfreq,
                                     highfreq=highfreq,
                                     numfreqs=numfreqs)

                if plot:
                    bosc.background_fit(plot_type='session')

                if pepisodes is None:
                    pepisodes = bosc.Pepisode
                    # be careful to only use events from lists that have eeg data.
                    # [np.isin(bosc.interest_events.list, self.lists)]
                    recalled = bosc.interest_events.recalled.values
                    tscore, _ = scp.ttest_ind(pepisodes[recalled],
                                              pepisodes[~recalled],
                                              axis=0)
                elif np.isnan(tscore).all():
                    tscore, _ = scp.ttest_ind(pepisodes[recalled],
                                              pepisodes[~recalled],
                                              axis=0)
                else:
                    pepisodes = np.vstack([pepisodes, bosc.Pepisode])
                    recalled = np.hstack(
                        [recalled, bosc.interest_events.recalled.values])
                    t, _ = scp.ttest_ind(pepisodes[recalled],
                                         pepisodes[~recalled],
                                         axis=0)
                    tscore = np.vstack([tscore, t])
                print("Proportion recalled:", recalled.mean())
            except IndexError:
                print('IndexError for subject {} session {}'.format(
                    subj, sess))
            except FileNotFoundError:
                print('FileNotFoundError for {} session {}'.format(subj, sess))
                continue
        if pepisodes is None:
            raise Exception('No working sessions')
        subj_pepisode = pepisodes if subj_pepisode is None else np.dstack(
            [subj_pepisode, pepisodes])
        subj_recalled = recalled if subj_recalled is None else np.vstack(
            [subj_recalled, recalled])
        subj_tscores = tscore if subj_tscores is None else np.vstack(
            [subj_tscores, tscore])
        if np.isnan(subj_tscores).all():
            raise Exception('Too many nan in T-scores. This problem can arise \
                when there are no recalled events.')
    if subj_pepisode.ndim > 2:  # if multiple electrode pairs, average over pairs
        print("Averaging over {} electrodes for subject {}".format(
            subj_pepisode.shape[2], subj))
        subj_pepisode = subj_pepisode.mean(2)
        subj_recalled = subj_recalled.mean(0)
    subj_recalled = subj_recalled.astype(bool)
    if subj_tscores.ndim > 1:
        print(len(sessions), 'sessions')
        subj_tscores = np.nanmean(subj_tscores, axis=0)

    print('{} total events: {} recalled \
    	and {} non-recalled'.format(len(subj_recalled), sum(subj_recalled),
                                 sum(~subj_recalled)))

    pep_rec = subj_pepisode[subj_recalled, :].mean(0)
    pep_nrec = subj_pepisode[~subj_recalled, :].mean(0)
    pep_all = subj_pepisode.mean(0)

    if save_result:
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        np.save(result_path + '{}_all_{}'.format(subj, method), pep_all)
        np.save(result_path + '{}_rec_{}'.format(subj, method), pep_rec)
        np.save(result_path + '{}_nrec_{}'.format(subj, method), pep_nrec)
        np.save(result_path + '{}_tscore_{}'.format(subj, method),
                subj_tscores)

    return pep_all, pep_rec, pep_nrec, subj_tscores