def test_load_from_rhino(self, subject, experiment, session, localization, file_type, rhino_root): if subject.startswith("LTP"): reader = CMLReader(subject=subject, experiment=experiment, session=session, localization=localization, rootdir=rhino_root) if "ltp" not in reader.reader_protocols[file_type]: with pytest.raises(exc.UnsupportedProtocolError): reader.load(file_type) return if file_type in ["electrode_categories", "classifier_summary", "math_summary", "session_summary", "baseline_classifier"]: subject = "R1111M" experiment = "FR2" session = 0 if file_type in ["used_classifier"]: subject = 'R1409D' experiment = 'FR6' session = 0 localization = 0 if subject.startswith("LTP") and file_type in ["contacts", "pairs"]: pytest.xfail("unsure if montage data exists for LTP") reader = CMLReader(subject=subject, localization=localization, experiment=experiment, session=session, rootdir=rhino_root) reader.load(file_type)
def test_rereference(self, subject, reref_possible, index, channel, rhino_root): reader = CMLReader(subject=subject, experiment='FR1', session=0, rootdir=rhino_root) rate = reader.load("sources")["sample_rate"] events = reader.load("events") events = events[events.type == "WORD"].iloc[:1] rel_start, rel_stop = 0, 100 expected_samples = int(rate * rel_stop / 1000) scheme = reader.load('pairs') load_eeg = partial(reader.load_eeg, events=events, rel_start=rel_start, rel_stop=rel_stop) if reref_possible: data = load_eeg() assert data.shape == (1, 100, expected_samples) data = load_eeg(scheme=scheme) assert data.shape == (1, 141, expected_samples) assert data.channels[index] == channel else: data_noreref = load_eeg() data_reref = load_eeg(scheme=scheme) assert_equal(data_noreref.data, data_reref.data) assert data_reref.channels[index] == channel
def test_load(self, file_type): with patched_cmlreader(datafile(file_type)): data_type = os.path.splitext(file_type)[0] reader = CMLReader(subject="R1405E", localization=0, experiment="FR5", session=1) reader.load(data_type=data_type)
def test_load_unimplemented(self): with patched_cmlreader(): reader = CMLReader(subject='R1405E', localization=0, experiment='FR1', session=0, montage=0) with pytest.raises(NotImplementedError): reader.load("fake_data")
def test_read_categories_rhino(self, kind, read_categories, rhino_root): reader = CMLReader("R1111M", "FR1", 0, 0, 0, rootdir=rhino_root) df = reader.load(kind, read_categories=read_categories) if read_categories: categories = reader.load("electrode_categories") else: categories = None self.assert_categories_correct(df, categories, read_categories)
def test_read_categories_missing(self, kind, rhino_root): """Try reading with electrode category info when that can't be found.""" subject, experiment, session = "R1132C", "TH1", 0 localization, montage = 0, 0 reader = CMLReader(subject, experiment, session, localization, montage, rootdir=rhino_root) with pytest.raises(exc.MissingDataError): reader.load(kind, read_categories=True)
def test_event_discrepancies(self, subject, experiment, session, rhino_root): """Test loading of known subjects with differences between session number in events.json and session number everywhere else. """ reader = CMLReader(subject, experiment, session, rootdir=rhino_root) pairs = reader.load("pairs") events = reader.load("events") reader.load_eeg(events.sample(n=1), rel_start=0, rel_stop=10, scheme=pairs)
def test_get_eeg(which, subject, experiment, session, shape, rhino_root): reader = CMLReader(subject, experiment, session, rootdir=rhino_root) all_events = reader.load("events") events = all_events[all_events.type == "STIM_ON"] eeg = tmi.get_eeg(which, reader, events) assert eeg.shape == shape
def test_get_stim_channels(rhino_root): reader = CMLReader("R1111M", "FR2", 0, rootdir=rhino_root) pairs = reader.load("pairs") events = tmi.get_stim_events(reader) channels = tmi.get_stim_channels(pairs, events) assert len(channels) == 1 assert channels == [140]
def test_channel_discrepancies(self, subject, experiment, session, eeg_channels, pairs_channels, rhino_root): """Test loading of known subjects with differences between channels in pairs.json and channels actually recorded. """ reader = CMLReader(subject, experiment, session, rootdir=rhino_root) pairs = reader.load("pairs") events = reader.load("events") with pytest.warns(MissingChannelsWarning): eeg = reader.load_eeg(events.sample(n=1), rel_start=0, rel_stop=10, scheme=pairs) assert len(eeg.channels) == eeg_channels assert len(pairs) == pairs_channels
def test_negative_offsets(self, rhino_root): subject, experiment = ("R1298E", "FR1") reader = CMLReader(subject=subject, experiment=experiment, session=0, rootdir=rhino_root) events = reader.load("events") events = events[events["type"] == "WORD"].iloc[:2] eeg = reader.load_eeg(events=events, rel_start=-100, rel_stop=-20) assert eeg.shape[-1] == 80
def test_eeg_reader(self, subject, index, channel, rhino_root): reader = CMLReader(subject=subject, experiment='FR1', session=0, rootdir=rhino_root) events = reader.load("events") events = events[events["type"] == "WORD"].iloc[:2] eeg = reader.load_eeg(events=events, rel_start=0, rel_stop=100) assert len(eeg.time) == 100 assert eeg.data.shape[0] == 2 assert eeg.channels[index] == channel
def test_read_eeg(subject, rhino_root): reader = CMLReader(subject, 'FR1', session=0, rootdir=rhino_root) samplerate = reader.load('sources')['sample_rate'] events = get_countdown_events(reader) resting = countdown_to_resting(events, samplerate) eeg = read_eeg_data(reader, resting, reref=False) # R1387E FR1 session 0 had 13 countdown start events and we get 3 epochs per # countdown expected_events = 13 * 3 assert eeg.shape == (expected_events, 121, 1000)
def test_get_distances(): pkg = "thetamod.test.data" filename = resource_filename(pkg, "R1260D_pairs.json") reader = CMLReader("R1260D") pairs = reader.load( "pairs", file_path=filename).sort_values(by=['contact_1', 'contact_2']) filename = resource_filename(pkg, "R1260D_distmat.npy") ref_result = np.load(filename) distmat = tmi.get_distances(pairs) assert_almost_equal(distmat, ref_result)
def test_invalidate_eeg(rhino_root): reader = CMLReader(subject='R1286J', experiment='catFR3', session=0, rootdir=rhino_root) pairs = reader.load("pairs") stim_events = get_stim_events(reader) pre_eeg, post_eeg = (get_eeg(which, reader, stim_events) for which in ("pre", "post")) thetamod.artifact.invalidate_eeg(reader, pre_eeg, post_eeg, rhino_root)
def test_filter_channels(self, subject, region_key, region_name, expected_channels, tlen, rhino_root): """Test that we can actually filter channels. This happens via rereference, so it's really just a special case check of that. """ reader = CMLReader(subject, "FR1", 0, rootdir=rhino_root) pairs = reader.load("pairs") scheme = pairs[pairs[region_key] == region_name] all_events = reader.load("events") events = all_events[all_events["type"] == "WORD"] eeg = reader.load_eeg(events, rel_start=-100, rel_stop=100, scheme=scheme) assert eeg.shape[0] == len(events) assert eeg.shape[1] == expected_channels assert eeg.shape[2] == tlen assert eeg.attrs["rereferencing_possible"] is ( True if subject != "R1384J" else False)
def test_resting_state_connectivity(rhino_root): subject = "R1354E" index = get_data_index("r1", rhino_root) sessions = index[(index.subject == subject) & (index.experiment == 'FR1')].session.unique() all_events = [] all_resting = [] data = [] for session in sessions: reader = CMLReader(subject, 'FR1', session, rootdir=rhino_root) events = get_countdown_events(reader) resting = countdown_to_resting(events, reader.load('sources')['sample_rate']) all_events.append(events) all_resting.append(resting) eeg = read_eeg_data(reader, resting, reref=False) data.append(eeg) # Verify that events match Ethan's analysis; his events are ordered in an # odd way, so we have to sort them to make sure they match ethan = np.load(resource_filename("thetamod.test.data", "R1354E_events_ethan.npy")) assert_equal(sorted(ethan["eegoffset"]), sorted(pd.concat(all_resting).eegoffset.values)) eegs = TimeSeries.concatenate(data) eegs.data = ButterworthFilter(time_series=eegs.to_ptsa(), ).filter().values conn = get_resting_state_connectivity(eegs.to_mne(), eegs.samplerate) basename = ('{subject}_baseline3trials_network_theta-alpha.npy' .format(subject=subject)) filename = Path(rhino_root).joinpath('scratch', 'esolo', 'tmi_analysis', subject, basename) data = np.load(filename) np.savez("test_output.npz", eeg=eegs.data, my_conn=conn, ethans_conn=data, events=pd.concat(all_events, ignore_index=True).to_records(), resting=pd.concat(all_resting, ignore_index=True).to_records()) assert_almost_equal(scipy.special.logit(conn), scipy.special.logit(data), 3)
def get_cmlevents(subj, montage=None, session=None, exp='TH1'): """ Returns the reformatted events df for subj and mont. This events struct does not include pathInfo, since that isn't recorded in the system used by cmlreaders. To get pathInfo you need to use `read_path_log`. """ #------Load data index for RAM df = get_data_index("r1") #------Specify the df for this subject and exp this_df = df[(df['subject'] == subj) & (df['experiment'] == exp)] #------Find out the sessions, localization, and montage for this subject if session is None: # default to first sess session = this_df['session'].iloc[0] if montage is None: # default to first mont montage = this_df['montage'].iloc[0] #------Get more specific df this_specific_df = (this_df[(this_df['session'] == session) & (this_df['montage'] == montage)]) loc = int(this_specific_df.iloc()[0]['localization']) #-------Subjs with a montage above 0 have aliases used in log files subject_alias = this_specific_df['subject_alias'].iloc[0] # ^ use .iloc[0] bc this_specific_df has only one item #------For some subjs the sess ID system changed over time, # and we need to know the original sess ID for certain log # files access orig_sess_ID = this_specific_df['original_session'].iloc[0] if type(orig_sess_ID) == str: orig_sess_ID = np.float64(orig_sess_ID) # I do it as float first in case of NaN if orig_sess_ID == int(orig_sess_ID): orig_sess_ID = int(orig_sess_ID) if np.isnan(orig_sess_ID): orig_sess_ID = session #------Use CMLReader to read the events structure reader = CMLReader(subj, exp, session=session, montage=montage, localization=loc) events = reader.load('events') events['original_session_ID'] = orig_sess_ID events['subject_alias'] = subject_alias # remove the unhelpful and inconsistent SESS_START event events = events[events['type'] != 'SESS_START'] return events
def is_rerefable(subject: str, experiment: str, session: int, localization: int = 0, montage: int = 0, rootdir: Optional[str] = None) -> bool: """Checks if a subject's EEG data can be arbitrarily rereferenced. Parameters ---------- subject Subject ID. experiment Experiment. session Session number. localization Localization number (default: 0). montage Montage number (default: 0). rootdir Root data directory. Returns ------- Whether or not the EEG data can be rereferenced. """ from cmlreaders import CMLReader reader = CMLReader(subject, experiment, session, localization, montage, rootdir=rootdir) sources = reader.load("sources") if sources["source_file"] == "eeg_timeseries.h5": path = Path(sources["path"]).parent.joinpath("noreref") if len(list(path.glob("*.h5"))) == 1: # only one HDF5 is present which indicates we recorded in hardware # bipolar mode return False return True
def test_read_categories(self, kind, read_categories): from cmlreaders.path_finder import PathFinder from cmlreaders.readers.electrodes import ( ElectrodeCategoriesReader, MontageReader ) cpath = datafile("R1111M_electrode_categories.txt") categories = ElectrodeCategoriesReader.fromfile(cpath) mpath = datafile("R1111M_{}.json".format(kind)) with ExitStack() as stack: stack.enter_context(patched_cmlreader()) stack.enter_context(patch.object(PathFinder, "find", return_value="")) stack.enter_context(patch.object(ElectrodeCategoriesReader, "load", return_value=categories)) stack.enter_context(patch.object(MontageReader, "_file_path", mpath)) reader = CMLReader("R1111M", "FR1", 0, 0, 0) df = reader.load(kind, read_categories=read_categories) self.assert_categories_correct(df, categories, read_categories)
def test_eeg_reader_with_events(self, subject, rhino_root): """Note: R1161E is split over two separate sets of files""" reader = CMLReader(subject=subject, experiment='FR1', session=0, rootdir=rhino_root) events = reader.load('events') word_events = events[events.type == 'WORD'] eeg = reader.load_eeg(events=word_events, rel_start=-75, rel_stop=75) assert eeg.shape[0] == len(word_events) assert eeg.shape[-1] == 150 ErrorType = exc.IncompatibleParametersError with pytest.raises(ErrorType): reader.load_eeg(events=word_events, rel_start=0) with pytest.raises(ErrorType): reader.load_eeg(events=word_events, rel_stop=0) with pytest.raises(ErrorType): reader.load_eeg(events=word_events)
class EEGConverter(object): def __init__(self, subject, experiment, session, outdir, rootdir=None): self.rootdir = get_root_dir(rootdir) self.outdir = Path(self.rootdir).joinpath(outdir) self.reader = CMLReader(subject, experiment, session, rootdir=rootdir) sources_filename = self.reader.path_finder.find("sources") with open(sources_filename, "r") as infile: self.sources = json.load(infile) self.eeg_files = [ sorted( Path(sources_filename).parent.joinpath("noreref").glob( info["name"] + "*")) for info in self.sources.values() ] @property def num_channels(self): return len(self.eeg_files[0]) @property def dtype(self): for sources in self.sources.values(): return sources["data_format"] def labels_as_array(self) -> np.ndarray: contacts = self.reader.load("contacts") strlen = contacts.label.str.len().max() labels = contacts.label.values.astype(f"|S{strlen}") return labels def to_hdf5(self, filename="eeg_timeseries.h5"): """Convert to HDF5.""" outpath = self.outdir.joinpath(filename) with h5py.File(outpath, "w") as hfile: labels = self.labels_as_array() hfile.create_dataset("labels", data=labels, chunks=True, compression="gzip") start_timestamps = [] for dset_num, info in tqdm(enumerate(self.sources.values())): dtype = info["data_format"] sample_rate = info["sample_rate"] start_timestamps.append(info["start_time_ms"] / 1000.) files = self.eeg_files[dset_num] num_channels = len(files) dset = None for ch, filename in tqdm(enumerate(files)): data = np.fromfile(str(filename), dtype=dtype) if dset is None: shape = (len(self.sources), num_channels, len(data)) dset = hfile.create_dataset( "eeg", shape, dtype=info["data_format"], # chunks=True, # compression=(32001 if compress else None), # compression_opts=9, # shuffle=True, ) dset[dset_num, ch] = data start_dset = hfile.create_dataset("start_time", data=start_timestamps) start_dset.attrs["desc"] = b"unix timestamp of session start" hfile.create_dataset("sample_rate", data=sample_rate) def to_npz(self, filename="eeg_timeseries.npy"): """Convert to numpy's format.""" outpath = self.outdir.joinpath(filename) arrays = {"labels": self.labels_as_array()} eeg = None for dset_num, info in tqdm(enumerate(self.sources.values())): files = self.eeg_files[dset_num] for ch, path in tqdm(enumerate(files)): with path.open() as f: data = np.fromfile(f, dtype=self.dtype) if eeg is None: shape = (len(self.sources), self.num_channels, data.shape[0]) eeg = np.empty(shape, dtype=self.dtype) eeg[dset_num, ch] = data arrays["eeg"] = eeg np.save(outpath, eeg, allow_pickle=False)
def test_ps_events(self, subject, experiment, session, rhino_root): reader = CMLReader(subject, experiment, session, rootdir=rhino_root) events = reader.load("events") task_events = reader.load("task_events") assert all(events == task_events)