def test_re_reference(): # create a fake data epochs_df = pd.DataFrame( np.array([[0, -3, 1, 2, 3], [0, -2, 4, 5, 6], [0, -1, 7, 8, 9]]), columns=[EPOCH_ID, TIME, "a", "b", "c"], ) eeg_streams = ["b", "c"] ref = ["a"] ref_type = "linked_pair" br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type) assert list(br_epochs_df.b) == [1.5, 3.0, 4.5] ref = ["a"] ref_type = "new_common" br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type) assert list(br_epochs_df.b) == [1, 1, 1] ref = ["a", "b"] ref_type = "common_average" br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type) assert list(br_epochs_df.b) == [0.5, 0.5, 0.5] with pytest.raises(ValueError) as excinfo: ref1 = set(ref) br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref1, ref_type) assert "ref should be a list of strings" in str(excinfo.value) epf._epochs_QC(br_epochs_df, eeg_streams, epoch_id=EPOCH_ID, time=TIME)
def test_center_eeg(): epochs_df = fake_data._get_df() # save a copy for demonstration # check_epochs_df = epochs_df.copy() # center two columns in place (for demonstration start and stop are epoch row indexes not times) eeg_streams = ["x", "z"] epoch_id = "epoch_id" time = "time" start = 0 stop = 2 # verify centering == 0 and report failures n_times = len(epochs_df[time].unique()) n_epochs = len(epochs_df[epoch_id].unique()) times = epochs_df[time].unique() istart, istop = epf._find_subscript(times, start, stop) center_idxs = np.array( [ np.arange(istart + (i * n_times), istop + (i * n_times)) for i in range(n_epochs) ] ).flatten() centered_epochs_df = epf.center_eeg( epochs_df, eeg_streams, start, stop, epoch_id=EPOCH_ID, time="time" ) zero_mns = ( centered_epochs_df.iloc[center_idxs, :].groupby(epoch_id)[eeg_streams].mean() ) assert np.allclose(0, zero_mns) epf._epochs_QC(centered_epochs_df, eeg_streams, epoch_id=epoch_id, time=time)
def test_re_reference_2(eeg_streams, ref, ref_type, expected): # create a fake data epochs_df = pd.DataFrame( np.array([[0, -3, 1, 2, 3], [0, -2, 4, 5, 6], [0, -1, 7, 8, 9]]), columns=[EPOCH_ID, TIME, "a", "b", "c"], ) br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type) assert list(br_epochs_df.b) == expected epf._epochs_QC(br_epochs_df, eeg_streams, epoch_id=EPOCH_ID, time=TIME)
def test_duplicate_values_of_epoch_id(): epochs_table, channels = fake_data._generate( n_epochs=10, n_samples=100, n_categories=2, n_channels=32, time=TIME, epoch_id=EPOCH_ID, ) epochs_table.loc[epochs_table[EPOCH_ID] == 16, [EPOCH_ID]] = 18 with pytest.raises(ValueError) as excinfo: epf._epochs_QC(epochs_table, channels) assert "Duplicate values of epoch_id" in str(excinfo.value)
def test_epochs_unequal_snapshots(): epochs_table, channels = fake_data._generate( n_epochs=10, n_samples=100, n_categories=2, n_channels=32, time=TIME, epoch_id=EPOCH_ID, ) epochs_table.drop(epochs_table.index[42], inplace=True) with pytest.raises(ValueError) as excinfo: epf._epochs_QC(epochs_table, channels) assert "differs from previous snapshot" in str(excinfo.value)
def test_raises_error_on_duplicate_channels(): epochs_table, channels = fake_data._generate( n_epochs=10, n_samples=100, n_categories=2, n_channels=32, time=TIME, epoch_id=EPOCH_ID, ) dupe_channel = channels[0] dupe_column = epochs_table[dupe_channel] bad_epochs_table = pd.concat([epochs_table, dupe_column], axis=1) with pytest.raises(ValueError) as excinfo: epf._epochs_QC(bad_epochs_table, channels) assert "Duplicate column names" in str(excinfo.value)
def test_drop_bad_epochs(): epoch_id = "epoch_id" time = "time_ms" epochs_df = WR_100_FEATHER_DF.copy() bads_column = "eeg_artifact" epochs_df_good = epf.drop_bad_epochs( epochs_df, bads_column, epoch_id=epoch_id, time=time ) epochs_df_good["new_col"] = 0 # get the group of time == 0 group = epochs_df.groupby([time]).get_group(0) good_idx = list(group[epoch_id][group[bads_column] == 0]) epochs_df_bad = epochs_df[~epochs_df[epoch_id].isin(good_idx)] assert epochs_df_good.shape[0] + epochs_df_bad.shape[0] == epochs_df.shape[0] epochs_df_good = epf.drop_bad_epochs(epochs_df, bads_column, epoch_id, time) epf._epochs_QC( epochs_df_good, epochs_df_good.columns.tolist(), epoch_id=epoch_id, time=time, )
def test_epochs_QC_fails(): epochs_df = WR_100_FEATHER_DF.copy() data_streams = ["MiPf", "MiCe", "MiPa", "MiOc"] with pytest.raises(ValueError) as excinfo: epochs_df1 = [1, 2] epf._epochs_QC(epochs_df1, data_streams) assert "epochs_df must be a Pandas DataFrame." in str(excinfo.value) with pytest.raises(ValueError) as excinfo: data_streams1 = set(data_streams) epf._epochs_QC(epochs_df, data_streams1) assert "data_streams should be a list of strings." in str(excinfo.value) with pytest.raises(ValueError) as excinfo: data_streams1 = ["A"] epf._epochs_QC(epochs_df, data_streams1) assert "data_streams should all be present in the epochs dataframe," in str( excinfo.value )
def test_epochs_QC(): epochs_df = WR_100_FEATHER_DF.copy() data_streams = ["MiPf", "MiCe", "MiPa", "MiOc"] epf._epochs_QC(epochs_df, data_streams, epoch_id="epoch_id", time="time_ms")
def categories2eventid(epochs_df, categories, epoch_id, time, time_stamp): """Build an MNE events array and event_id dict from one or more categorical variables. This uses patsy formulas and dummy coded (full rank) design matrixes to construct the MNE format event_id dictionary and corresponding events array (events x 3) for tagging and binning single-trial epochs for time-domain aggregation into ``mne.Evoked``, e.g., average event-related potentials (ERPs). A single category is split into the category levels, a.k.a conditions, bins, like so: ``~ 0 + a``. Multiple categories fully crossed like so: ``~ 0 + a:b`` and ``~ 0 + a:b:c`` Parameters ---------- epochs_df : pandas.DataFrame A spudtr format epochs data with ``epoch_id``, ``time`` columns. categories : str or iterable of str The column name(s) of the categorical variables. epoch_id : str The name of the column with the unique epoch ids, e.g., ``epoch_id``, ``Epoch_idx``. time : str The name of the column with the regular epoch time stamps, e.g., ``time``, ``time_ms``, ``time_s``. time_stamp : int The time stamp in the epoch to look up the categorical variable values, e.g., ``0`` Returns ------- mne_event_id : dict An MNE Python event_id dictionary where each item is ``label: event_code``. The ``label`` is the column name from the patsy full-rank design matrix (incidence matrix) for the categories (thank you NJS). The ``event_code`` is the 1-based column index in the design matrix. mne_events : np.array, shape=(number_of_epochs, 3) there is one row for each epoch in ``epochs_df``. Each row is ``[epoch_id, 0, mne_event_code]`` where ``mne_event_code`` is the newly constructed event code derived from the ``patsy`` design matrix column Examples -------- Suppose at the specified time stamp the epochs_df categorical columns ``a`` and ``b`` have have the following levels: ``a: a1, a2``, ``b: b1, b2, b3`` >>> categories2eventid(epochs_df, categories="a", epoch_id, time, time_stamp) event_ids = { "a[a1]": 1, "a[a2]": 2 } >>> categories2eventid(epochs_df, categories="b", epoch_id, time, time_stamp) event_ids = { "b[b1]": 1, "b[b2]": 2, "b[b3]": 3 } >>> categories2eventid(epochs_df, categories=["a", "b"], epoch_id, time, time_stamp) event_ids = { 'a[a1]:b[b1]': 1, 'a[a2]:b[b1]': 2, 'a[a1]:b[b2]': 3, 'a[a2]:b[b2]': 4, 'a[a1]:b[b3]': 5, 'a[a2]:b[b3]': 6 } """ # modicum of guarding if isinstance(categories, str): categories = [categories] # check spudtr epochs format _ = _epochs_QC(epochs_df, categories, epoch_id=epoch_id, time=time) if time_stamp not in epochs_df[time].unique(): raise ValueError( f"time_stamp {time_stamp} not found in epochs_df['{time}']") # slice the epoch row at the specified time_stamp, e.g., time==0 # the category columns at this row are used to build the new # event_id dictionary events_df = epochs_df[epochs_df[time] == time_stamp].copy() for cat in categories: events_df[cat] = pd.Categorical(events_df[cat]) # ensure dm is a full rank indicator matrix n columns = product of # factor levels w/ exactly one 1 in each row formula = "~ 0 + " + ":".join(categories) dm = patsy.dmatrix(formula, events_df) assert all( np.equal(1, [len(a) for a in map(lambda x: np.where(x == 1)[0], dm)])) dm_cols = dm.design_info.column_names # convert indidcator design matrix to a 1-base vector that indexes # which column of dm has the indicator 1 via binary summation # e.g., dm = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] -> [1, 2, 3] dm_col_code = np.array( [np.where(dm[i, :] == 1)[0] + 1 for i in range(len(dm))]).squeeze() assert dm_col_code.min() == 1 assert dm_col_code.max() == dm.shape[1] # 1-base mne event code dict with column labels from patsy mne_event_id = dict([(dm_col, i + 1) for i, dm_col in enumerate(dm_cols)]) # mne array: n-events x 3 mne_events = np.stack( [ events_df["epoch_id"].to_numpy(), np.zeros(len(events_df)), dm_col_code, ], axis=1, ).astype("int") # pdb.set_trace() return mne_event_id, mne_events
def spudtr_to_mne_epochs( epochs_df, eeg_streams, epoch_id=None, time=None, time_unit=None, mne_events=None, mne_event_ids=None, ): """construct mne.Epochs from a spudtr format epochs pandas.Dataframe Parameters ---------- epochs_df : pandas.DataFrame spudtr format epochs in rows (epoch x time stamp) and columns (categories ... data streams). Epoch indices must be unique, time stamps are integers, the same in each epoch. Categories are experimental variables, string labels are allowed. Data stream columns hold the EEG (or other) data. eeg_streams : list of str column names of the data streams epoch_id : str name of the epoch index time : str name of the time stamp index, e.g., "time_ms" time_unit : float time stamp unit in seconds, e.g., 0.001 for milliseconds, 1.0 for seconds mne_events : np.array of int, shape=(n, 3), optional standard MNE event array: first column is the 0-base row index of the event in epochs_df, second column is all 0's (legacy, not used), third column is the integer event code at that row. Negative event codes are unsafe. mne_event_ids : dict, optional keys and values are 1-1, keys string labels of the integer event codes Returns ------- epochs : mne.Epochs """ # check dataframe format _epochs_QC(epochs_df, eeg_streams, epoch_id=epoch_id, time=time) # no point to an event ids dict without the actual events if mne_event_ids is not None and mne_events is None: raise ValueError( "mne_events must also be specified to use mne_event_ids") # compute sfreq samples / second from the time-stamps. _epochs_QC should # ensure regular sampling interval but check anyway ... timestamps = epochs_df[time].unique() sampling_interval = list(set((timestamps - np.roll(timestamps, 1))[1:])) assert len(sampling_interval) == 1 # should be guaranteed by _epochs_QC sfreq = 1.0 / (sampling_interval[0] * time_unit) # samples per second montage = _streams2mne_digmont(eeg_streams) info = mne.create_info(montage.ch_names, sfreq=sfreq, ch_types="eeg") info.set_montage(montage) # for mne >0.19 tmin = epochs_df[time].min() * time_unit epochs_data = [] # import pdb; pdb.set_trace() for epoch_i in epochs_df[epoch_id].unique(): # epoch1 = epochs_df[info["ch_names"]][ epoch1 = epochs_df[montage.ch_names][epochs_df.epoch_id == epoch_i].to_numpy() epochs_data.append(epoch1.T) epochs = mne.EpochsArray( epochs_data, info=info, tmin=tmin, events=mne_events, event_id=mne_event_ids, ) return epochs