Esempio n. 1
0
def test_re_reference():

    # create a fake data
    epochs_df = pd.DataFrame(
        np.array([[0, -3, 1, 2, 3], [0, -2, 4, 5, 6], [0, -1, 7, 8, 9]]),
        columns=[EPOCH_ID, TIME, "a", "b", "c"],
    )

    eeg_streams = ["b", "c"]

    ref = ["a"]
    ref_type = "linked_pair"
    br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type)
    assert list(br_epochs_df.b) == [1.5, 3.0, 4.5]
    ref = ["a"]
    ref_type = "new_common"
    br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type)
    assert list(br_epochs_df.b) == [1, 1, 1]
    ref = ["a", "b"]
    ref_type = "common_average"
    br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type)
    assert list(br_epochs_df.b) == [0.5, 0.5, 0.5]

    with pytest.raises(ValueError) as excinfo:
        ref1 = set(ref)
        br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref1, ref_type)
    assert "ref should be a list of strings" in str(excinfo.value)

    epf._epochs_QC(br_epochs_df, eeg_streams, epoch_id=EPOCH_ID, time=TIME)
Esempio n. 2
0
def test_center_eeg():
    epochs_df = fake_data._get_df()

    # save a copy for demonstration
    # check_epochs_df = epochs_df.copy()

    # center two columns in place (for demonstration start and stop are epoch row indexes not times)
    eeg_streams = ["x", "z"]
    epoch_id = "epoch_id"
    time = "time"
    start = 0
    stop = 2

    # verify centering == 0 and report failures
    n_times = len(epochs_df[time].unique())
    n_epochs = len(epochs_df[epoch_id].unique())
    times = epochs_df[time].unique()
    istart, istop = epf._find_subscript(times, start, stop)
    center_idxs = np.array(
        [
            np.arange(istart + (i * n_times), istop + (i * n_times))
            for i in range(n_epochs)
        ]
    ).flatten()
    centered_epochs_df = epf.center_eeg(
        epochs_df, eeg_streams, start, stop, epoch_id=EPOCH_ID, time="time"
    )
    zero_mns = (
        centered_epochs_df.iloc[center_idxs, :].groupby(epoch_id)[eeg_streams].mean()
    )
    assert np.allclose(0, zero_mns)
    epf._epochs_QC(centered_epochs_df, eeg_streams, epoch_id=epoch_id, time=time)
Esempio n. 3
0
def test_re_reference_2(eeg_streams, ref, ref_type, expected):

    # create a fake data
    epochs_df = pd.DataFrame(
        np.array([[0, -3, 1, 2, 3], [0, -2, 4, 5, 6], [0, -1, 7, 8, 9]]),
        columns=[EPOCH_ID, TIME, "a", "b", "c"],
    )

    br_epochs_df = epf.re_reference(epochs_df, eeg_streams, ref, ref_type)

    assert list(br_epochs_df.b) == expected
    epf._epochs_QC(br_epochs_df, eeg_streams, epoch_id=EPOCH_ID, time=TIME)
Esempio n. 4
0
def test_duplicate_values_of_epoch_id():
    epochs_table, channels = fake_data._generate(
        n_epochs=10,
        n_samples=100,
        n_categories=2,
        n_channels=32,
        time=TIME,
        epoch_id=EPOCH_ID,
    )
    epochs_table.loc[epochs_table[EPOCH_ID] == 16, [EPOCH_ID]] = 18

    with pytest.raises(ValueError) as excinfo:
        epf._epochs_QC(epochs_table, channels)
    assert "Duplicate values of epoch_id" in str(excinfo.value)
Esempio n. 5
0
def test_epochs_unequal_snapshots():

    epochs_table, channels = fake_data._generate(
        n_epochs=10,
        n_samples=100,
        n_categories=2,
        n_channels=32,
        time=TIME,
        epoch_id=EPOCH_ID,
    )

    epochs_table.drop(epochs_table.index[42], inplace=True)
    with pytest.raises(ValueError) as excinfo:
        epf._epochs_QC(epochs_table, channels)
    assert "differs from previous snapshot" in str(excinfo.value)
Esempio n. 6
0
def test_raises_error_on_duplicate_channels():

    epochs_table, channels = fake_data._generate(
        n_epochs=10,
        n_samples=100,
        n_categories=2,
        n_channels=32,
        time=TIME,
        epoch_id=EPOCH_ID,
    )
    dupe_channel = channels[0]
    dupe_column = epochs_table[dupe_channel]
    bad_epochs_table = pd.concat([epochs_table, dupe_column], axis=1)

    with pytest.raises(ValueError) as excinfo:
        epf._epochs_QC(bad_epochs_table, channels)
    assert "Duplicate column names" in str(excinfo.value)
Esempio n. 7
0
def test_drop_bad_epochs():
    epoch_id = "epoch_id"
    time = "time_ms"

    epochs_df = WR_100_FEATHER_DF.copy()
    bads_column = "eeg_artifact"

    epochs_df_good = epf.drop_bad_epochs(
        epochs_df, bads_column, epoch_id=epoch_id, time=time
    )
    epochs_df_good["new_col"] = 0

    # get the group of time == 0
    group = epochs_df.groupby([time]).get_group(0)
    good_idx = list(group[epoch_id][group[bads_column] == 0])
    epochs_df_bad = epochs_df[~epochs_df[epoch_id].isin(good_idx)]
    assert epochs_df_good.shape[0] + epochs_df_bad.shape[0] == epochs_df.shape[0]
    epochs_df_good = epf.drop_bad_epochs(epochs_df, bads_column, epoch_id, time)
    epf._epochs_QC(
        epochs_df_good, epochs_df_good.columns.tolist(), epoch_id=epoch_id, time=time,
    )
Esempio n. 8
0
def test_epochs_QC_fails():
    epochs_df = WR_100_FEATHER_DF.copy()
    data_streams = ["MiPf", "MiCe", "MiPa", "MiOc"]

    with pytest.raises(ValueError) as excinfo:
        epochs_df1 = [1, 2]
        epf._epochs_QC(epochs_df1, data_streams)
    assert "epochs_df must be a Pandas DataFrame." in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        data_streams1 = set(data_streams)
        epf._epochs_QC(epochs_df, data_streams1)
    assert "data_streams should be a list of strings." in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        data_streams1 = ["A"]
        epf._epochs_QC(epochs_df, data_streams1)
    assert "data_streams should all be present in the epochs dataframe," in str(
        excinfo.value
    )
Esempio n. 9
0
def test_epochs_QC():
    epochs_df = WR_100_FEATHER_DF.copy()
    data_streams = ["MiPf", "MiCe", "MiPa", "MiOc"]
    epf._epochs_QC(epochs_df, data_streams, epoch_id="epoch_id", time="time_ms")
Esempio n. 10
0
def categories2eventid(epochs_df, categories, epoch_id, time, time_stamp):
    """Build an MNE events array and event_id dict from one or more categorical variables.

    This uses patsy formulas and dummy coded (full rank) design
    matrixes to construct the MNE format event_id dictionary and
    corresponding events array (events x 3) for tagging and binning
    single-trial epochs for time-domain aggregation into
    ``mne.Evoked``, e.g., average event-related potentials (ERPs).

    A single category is split into the category levels, a.k.a conditions, bins,
    like so: ``~ 0 + a``.

    Multiple categories fully crossed like so: ``~ 0 +  a:b`` and ``~ 0 + a:b:c``
    
    Parameters
    ----------
    epochs_df : pandas.DataFrame
       A spudtr format epochs data with ``epoch_id``, ``time`` columns.

    categories : str or iterable of str
        The column name(s) of the categorical variables.

    epoch_id : str
        The name of the column with the unique epoch ids, e.g.,
        ``epoch_id``, ``Epoch_idx``.

    time : str
        The name of the column with the regular epoch time stamps, e.g., ``time``,
        ``time_ms``, ``time_s``.

    time_stamp : int The time stamp in the epoch to look up the
        categorical variable values, e.g., ``0``

    Returns
    -------
    mne_event_id : dict

       An MNE Python event_id dictionary where each item is ``label:
       event_code``.  The ``label`` is the column name from the patsy
       full-rank design matrix (incidence matrix) for the categories
       (thank you NJS). The ``event_code`` is the 1-based column index
       in the design matrix.

    mne_events : np.array, shape=(number_of_epochs, 3) there is one
       row for each epoch in ``epochs_df``. Each row is

         ``[epoch_id, 0, mne_event_code]`` 

       where ``mne_event_code`` is the newly
       constructed event code derived from the ``patsy`` design matrix
       column
        

    Examples
    --------
    Suppose at the specified time stamp the epochs_df categorical
    columns ``a`` and ``b`` have have the following levels: ``a: a1,
    a2``, ``b: b1, b2, b3``

    >>> categories2eventid(epochs_df, categories="a", epoch_id, time, time_stamp)
    event_ids = {
        "a[a1]": 1,
        "a[a2]": 2
    }


    >>> categories2eventid(epochs_df, categories="b", epoch_id, time, time_stamp)
    event_ids = {
        "b[b1]": 1,
        "b[b2]": 2,
        "b[b3]": 3
    }


    >>> categories2eventid(epochs_df, categories=["a", "b"], epoch_id, time, time_stamp)
    event_ids = {
        'a[a1]:b[b1]': 1,
        'a[a2]:b[b1]': 2,
        'a[a1]:b[b2]': 3,
        'a[a2]:b[b2]': 4,
        'a[a1]:b[b3]': 5,
        'a[a2]:b[b3]': 6
    }



    """

    # modicum of guarding
    if isinstance(categories, str):
        categories = [categories]

    # check spudtr epochs format
    _ = _epochs_QC(epochs_df, categories, epoch_id=epoch_id, time=time)

    if time_stamp not in epochs_df[time].unique():
        raise ValueError(
            f"time_stamp {time_stamp} not found in epochs_df['{time}']")

    # slice the epoch row at the specified time_stamp, e.g., time==0
    # the category columns at this row are used to build the new
    # event_id dictionary
    events_df = epochs_df[epochs_df[time] == time_stamp].copy()
    for cat in categories:
        events_df[cat] = pd.Categorical(events_df[cat])

    # ensure dm is a full rank indicator matrix n columns = product of
    # factor levels w/ exactly one 1 in each row
    formula = "~ 0 + " + ":".join(categories)
    dm = patsy.dmatrix(formula, events_df)
    assert all(
        np.equal(1, [len(a) for a in map(lambda x: np.where(x == 1)[0], dm)]))
    dm_cols = dm.design_info.column_names

    # convert indidcator design  matrix to a 1-base vector that indexes
    # which column of dm has the indicator 1 via binary summation
    # e.g., dm = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] -> [1, 2, 3]

    dm_col_code = np.array(
        [np.where(dm[i, :] == 1)[0] + 1 for i in range(len(dm))]).squeeze()
    assert dm_col_code.min() == 1
    assert dm_col_code.max() == dm.shape[1]

    # 1-base mne event code dict with column labels from patsy
    mne_event_id = dict([(dm_col, i + 1) for i, dm_col in enumerate(dm_cols)])

    # mne array: n-events x 3
    mne_events = np.stack(
        [
            events_df["epoch_id"].to_numpy(),
            np.zeros(len(events_df)),
            dm_col_code,
        ],
        axis=1,
    ).astype("int")
    # pdb.set_trace()
    return mne_event_id, mne_events
Esempio n. 11
0
def spudtr_to_mne_epochs(
    epochs_df,
    eeg_streams,
    epoch_id=None,
    time=None,
    time_unit=None,
    mne_events=None,
    mne_event_ids=None,
):
    """construct mne.Epochs from a spudtr format epochs pandas.Dataframe
    
    Parameters
    ----------
    epochs_df : pandas.DataFrame
        spudtr format epochs in rows (epoch x time stamp) and columns
        (categories ... data streams). Epoch indices must be unique, time stamps are
        integers, the same in each epoch. Categories are experimental variables,
        string labels are allowed. Data stream columns hold the EEG (or other) data. 

    eeg_streams : list of str
        column names of the data streams

    epoch_id : str
        name of the epoch index

    time : str
        name of the time stamp index, e.g., "time_ms" 

    time_unit : float
        time stamp unit in seconds, e.g., 0.001 for milliseconds, 1.0
        for seconds

    mne_events : np.array of int, shape=(n, 3), optional
        standard MNE event array: first column is the 0-base row index
        of the event in epochs_df, second column is all 0's (legacy,
        not used), third column is the integer event code at that
        row. Negative event codes are unsafe.

    mne_event_ids : dict, optional
        keys and values are 1-1, keys string labels of the integer event codes

    
    Returns
    -------
    epochs : mne.Epochs

    """

    # check dataframe format
    _epochs_QC(epochs_df, eeg_streams, epoch_id=epoch_id, time=time)

    # no point to an event ids dict without the actual events
    if mne_event_ids is not None and mne_events is None:
        raise ValueError(
            "mne_events must also be specified to use mne_event_ids")

    # compute sfreq samples / second from the time-stamps. _epochs_QC should
    # ensure regular sampling interval but check anyway ...
    timestamps = epochs_df[time].unique()
    sampling_interval = list(set((timestamps - np.roll(timestamps, 1))[1:]))
    assert len(sampling_interval) == 1  # should be guaranteed by _epochs_QC
    sfreq = 1.0 / (sampling_interval[0] * time_unit)  # samples per second

    montage = _streams2mne_digmont(eeg_streams)
    info = mne.create_info(montage.ch_names, sfreq=sfreq, ch_types="eeg")
    info.set_montage(montage)  # for mne >0.19

    tmin = epochs_df[time].min() * time_unit
    epochs_data = []
    # import pdb; pdb.set_trace()
    for epoch_i in epochs_df[epoch_id].unique():
        # epoch1 = epochs_df[info["ch_names"]][
        epoch1 = epochs_df[montage.ch_names][epochs_df.epoch_id ==
                                             epoch_i].to_numpy()
        epochs_data.append(epoch1.T)
    epochs = mne.EpochsArray(
        epochs_data,
        info=info,
        tmin=tmin,
        events=mne_events,
        event_id=mne_event_ids,
    )

    return epochs