Exemple #1
0
def test_windows_fixed_length_cropped(lazy_loadable_dataset):
    """Test fixed length windowing on cropped data.

    Cropping raw data changes the `first_samp` attribute of the Raw object, and
    so it is important to test this is taken into account by the windowers.
    """
    tmin, tmax = 100, 120

    ds = copy.deepcopy(lazy_loadable_dataset)
    ds.datasets[0].raw.annotations.crop(tmin, tmax)

    crop_ds = copy.deepcopy(lazy_loadable_dataset)
    crop_transform = Preprocessor('crop', tmin=tmin, tmax=tmax)
    preprocess(crop_ds, [crop_transform])

    # Extract windows
    sfreq = ds.datasets[0].raw.info['sfreq']
    tmin_samples, tmax_samples = int(tmin * sfreq), int(tmax * sfreq)

    windows1 = create_fixed_length_windows(concat_ds=ds,
                                           start_offset_samples=tmin_samples,
                                           stop_offset_samples=tmax_samples,
                                           window_size_samples=100,
                                           window_stride_samples=100,
                                           drop_last_window=True)
    windows2 = create_fixed_length_windows(concat_ds=crop_ds,
                                           start_offset_samples=0,
                                           stop_offset_samples=None,
                                           window_size_samples=100,
                                           window_stride_samples=100,
                                           drop_last_window=True)
    assert (windows1[0][0] == windows2[0][0]).all()
Exemple #2
0
def test_window_sizes_too_large(concat_ds_targets):
    concat_ds, targets = concat_ds_targets
    # Window size larger than all trials
    window_size = len(concat_ds.datasets[0]) + 1
    with pytest.raises(
            ValueError, match=f'Window size {window_size} exceeds trial durat'):
        create_windows_from_events(
            concat_ds=concat_ds,
            window_size_samples=window_size,
            window_stride_samples=window_size,
            trial_start_offset_samples=0,
            trial_stop_offset_samples=0,
            drop_last_window=False,
        )

    with pytest.raises(
            ValueError, match=f'Window size {window_size} exceeds trial durat'):
        create_fixed_length_windows(
            concat_ds=concat_ds,
            window_size_samples=window_size,
            window_stride_samples=window_size,
            drop_last_window=False,
        )

    # Window size larger than one single trial
    annots = concat_ds.datasets[0].raw.annotations
    annot_0 = annots[0]
    # Window equal original trials size
    window_size = int(
        annot_0["duration"] * concat_ds.datasets[0].raw.info['sfreq'])

    # Make first trial 1 second shorter
    annot_0["duration"] -= 1

    # Replace first trial by a new shorter one
    annots.delete(0)
    del annot_0["orig_time"]
    annots.append(**annot_0)
    concat_ds.datasets[0].raw.set_annotations(annots)
    with pytest.warns(
            UserWarning,
            match=".* are being dropped as the window size .*"
    ):
        create_windows_from_events(
            concat_ds=concat_ds,
            window_size_samples=window_size,
            window_stride_samples=window_size,
            trial_start_offset_samples=0,
            trial_stop_offset_samples=0,
            drop_last_window=False,
            accepted_bads_ratio=0.5,
            on_missing='ignore'
        )
Exemple #3
0
def test_drop_bad_windows(concat_ds_targets, drop_bad_windows, preload):
    concat_ds, _ = concat_ds_targets
    windows_from_events = create_windows_from_events(
        concat_ds=concat_ds,
        trial_start_offset_samples=0,
        trial_stop_offset_samples=0,
        window_size_samples=100,
        window_stride_samples=100,
        drop_last_window=False,
        preload=preload,
        drop_bad_windows=drop_bad_windows)

    windows_fixed_length = create_fixed_length_windows(
        concat_ds=concat_ds,
        start_offset_samples=0,
        stop_offset_samples=1000,
        window_size_samples=1000,
        window_stride_samples=1000,
        drop_last_window=False,
        preload=preload,
        drop_bad_windows=drop_bad_windows)

    assert (windows_from_events.datasets[0].windows._bad_dropped ==
            drop_bad_windows)
    assert (windows_fixed_length.datasets[0].windows._bad_dropped ==
            drop_bad_windows)
Exemple #4
0
def test_fixed_length_windows_preload_false(lazy_loadable_dataset):
    windows = create_fixed_length_windows(
        concat_ds=lazy_loadable_dataset, start_offset_samples=0,
        stop_offset_samples=100, window_size_samples=100,
        window_stride_samples=100, drop_last_window=False, preload=False)

    assert all([not ds.windows.preload for ds in windows.datasets])
Exemple #5
0
def test_epochs_kwargs(lazy_loadable_dataset):
    picks = ['ch0']
    on_missing = 'warning'
    flat = {'eeg': 3e-6}
    reject = {'eeg': 43e-6}

    windows = create_windows_from_events(
        concat_ds=lazy_loadable_dataset, trial_start_offset_samples=0,
        trial_stop_offset_samples=0, window_size_samples=100,
        window_stride_samples=100, drop_last_window=False, picks=picks,
        on_missing=on_missing, flat=flat, reject=reject)

    epochs = windows.datasets[0].windows
    assert epochs.ch_names == picks
    assert epochs.reject == reject
    assert epochs.flat == flat
    for ds in windows.datasets:
        assert ds.window_kwargs == [
            ('create_windows_from_events', {
                'infer_mapping': True, 'infer_window_size_stride': False,
                'trial_start_offset_samples': 0, 'trial_stop_offset_samples': 0,
                'window_size_samples': 100, 'window_stride_samples': 100,
                'drop_last_window': False, 'mapping': {'test': 0}, 'preload': False,
                'drop_bad_windows': True, 'picks': picks, 'reject': reject,
                'flat': flat, 'on_missing': on_missing,
                'accepted_bads_ratio': 0.0})
        ]

    windows = create_fixed_length_windows(
        concat_ds=lazy_loadable_dataset, start_offset_samples=0,
        stop_offset_samples=None, window_size_samples=100,
        window_stride_samples=100, drop_last_window=False, picks=picks,
        on_missing=on_missing, flat=flat, reject=reject)

    epochs = windows.datasets[0].windows
    assert epochs.ch_names == picks
    assert epochs.reject == reject
    assert epochs.flat == flat
    for ds in windows.datasets:
        assert ds.window_kwargs == [
            ('create_fixed_length_windows', {
                'start_offset_samples': 0, 'stop_offset_samples': None,
                'window_size_samples': 100, 'window_stride_samples': 100,
                'drop_last_window': False, 'mapping': None, 'preload': False,
                'drop_bad_windows': True, 'picks': picks, 'reject': reject,
                'flat': flat, 'targets_from': 'metadata', 'last_target_only': True,
                'on_missing': on_missing}),
            ('WindowsDataset', {
                'targets_from': 'metadata',
                'last_target_only': True,
            })
        ]
def load_example_data(preload, window_len_s, n_subjects=10):
    """Create windowed dataset from subjects of the TUH Abnormal dataset.

    Parameters
    ----------
    preload: bool
        If True, use eager loading, otherwise use lazy loading.
    n_subjects: int
        Number of subjects to load.

    Returns
    -------
    windows_ds: BaseConcatDataset
        Windowed data.

    .. warning::
        The recordings from the TUH Abnormal corpus do not all share the same
        sampling rate. The following assumes that the files have already been
        resampled to a common sampling rate.
    """
    subject_ids = list(range(n_subjects))
    ds = TUHAbnormal(TUH_PATH,
                     subject_ids=subject_ids,
                     target_name='pathological',
                     preload=preload)

    fs = ds.datasets[0].raw.info['sfreq']
    window_len_samples = int(fs * window_len_s)
    window_stride_samples = int(fs * 4)
    # window_stride_samples = int(fs * window_len_s)
    windows_ds = create_fixed_length_windows(
        ds,
        start_offset_samples=0,
        stop_offset_samples=None,
        window_size_samples=window_len_samples,
        window_stride_samples=window_stride_samples,
        drop_last_window=True,
        preload=preload,
        drop_bad_windows=True)

    # Drop bad epochs
    # XXX: This could be parallelized.
    # XXX: Also, this could be implemented in the Dataset object itself.
    for ds in windows_ds.datasets:
        ds.windows.drop_bad()
        assert ds.windows.preload == preload

    return windows_ds
Exemple #7
0
def test_fixed_length_windower_n_jobs(lazy_loadable_dataset):
    longer_dataset = BaseConcatDataset([lazy_loadable_dataset.datasets[0]] * 8)
    windows = [create_fixed_length_windows(
        concat_ds=longer_dataset, start_offset_samples=0,
        stop_offset_samples=None, window_size_samples=100,
        window_stride_samples=100, drop_last_window=True, preload=True,
        n_jobs=n_jobs) for n_jobs in [1, 2]]

    assert windows[0].description.equals(windows[1].description)
    for ds1, ds2 in zip(windows[0].datasets, windows[1].datasets):
        # assert ds1.windows == ds2.windows  # Runs locally, fails in CI
        assert np.allclose(ds1.windows.get_data(), ds2.windows.get_data())
        assert pd.Series(ds1.windows.info).to_json() == \
               pd.Series(ds2.windows.info).to_json()
        assert ds1.description.equals(ds2.description)
        assert np.array_equal(ds1.y, ds2.y)
        assert np.array_equal(ds1.crop_inds, ds2.crop_inds)
Exemple #8
0
def test_fixed_length_windower(start_offset_samples, window_size_samples,
                               window_stride_samples, drop_last_window,
                               mapping):
    rng = np.random.RandomState(42)
    info = mne.create_info(ch_names=['0', '1'], sfreq=50, ch_types='eeg')
    data = rng.randn(2, 1000)
    raw = mne.io.RawArray(data=data, info=info)
    desc = pd.Series({'pathological': True, 'gender': 'M', 'age': 48})
    base_ds = BaseDataset(raw, desc, target_name="age")
    concat_ds = BaseConcatDataset([base_ds])

    if window_size_samples is None:
        window_size_samples = base_ds.raw.n_times
    stop_offset_samples = data.shape[1] - start_offset_samples
    epochs_ds = create_fixed_length_windows(
        concat_ds,
        start_offset_samples=start_offset_samples,
        stop_offset_samples=stop_offset_samples,
        window_size_samples=window_size_samples,
        window_stride_samples=window_stride_samples,
        drop_last_window=drop_last_window,
        mapping=mapping)

    if mapping is not None:
        assert base_ds.target == 48
        assert all(epochs_ds.datasets[0].windows.metadata['target'] == 0)

    epochs_data = epochs_ds.datasets[0].windows.get_data()

    idxs = np.arange(start_offset_samples,
                     stop_offset_samples - window_size_samples + 1,
                     window_stride_samples)
    if not drop_last_window and idxs[
            -1] != stop_offset_samples - window_size_samples:
        idxs = np.append(idxs, stop_offset_samples - window_size_samples)

    assert len(idxs) == epochs_data.shape[0], (
        'Number of epochs different than expected')
    assert window_size_samples == epochs_data.shape[2], (
        'Window size different than expected')
    for j, idx in enumerate(idxs):
        np.testing.assert_allclose(base_ds.raw.get_data()[:, idx:idx +
                                                          window_size_samples],
                                   epochs_data[j, :],
                                   err_msg=f'Epochs different for epoch {j}')
def prepare_data(n_recs, save, preload, n_jobs):
    if save:
        tmp_dir = tempfile.TemporaryDirectory()
        save_dir = tmp_dir.name
    else:
        save_dir = None

    # (1) Load the data
    concat_ds = SleepPhysionet(subject_ids=range(n_recs),
                               recording_ids=[1],
                               crop_wake_mins=30,
                               preload=preload)
    sfreq = concat_ds.datasets[0].raw.info['sfreq']

    # (2) Preprocess the continuous data
    preprocessors = [
        Preprocessor('crop', tmin=10),
        Preprocessor('filter', l_freq=None, h_freq=30)
    ]
    preprocess(concat_ds,
               preprocessors,
               save_dir=save_dir,
               overwrite=True,
               n_jobs=n_jobs)

    # (3) Window the data
    windows_ds = create_fixed_length_windows(concat_ds,
                                             0,
                                             None,
                                             int(30 * sfreq),
                                             int(30 * sfreq),
                                             True,
                                             preload=preload,
                                             n_jobs=n_jobs)

    # Preprocess the windowed data
    preprocessors = [Preprocessor(scale, channel_wise=True)]
    preprocess(windows_ds,
               preprocessors,
               save_dir=save_dir,
               overwrite=True,
               n_jobs=n_jobs)
Exemple #10
0
def test_epochs_kwargs(lazy_loadable_dataset):
    picks = ['ch0']
    on_missing = 'warning'
    flat = {'eeg': 3e-6}
    reject = {'eeg': 43e-6}

    windows = create_windows_from_events(concat_ds=lazy_loadable_dataset,
                                         trial_start_offset_samples=0,
                                         trial_stop_offset_samples=0,
                                         window_size_samples=100,
                                         window_stride_samples=100,
                                         drop_last_window=False,
                                         picks=picks,
                                         on_missing=on_missing,
                                         flat=flat,
                                         reject=reject)

    epochs = windows.datasets[0].windows
    assert epochs.ch_names == picks
    assert epochs.reject == reject
    assert epochs.flat == flat

    windows = create_fixed_length_windows(concat_ds=lazy_loadable_dataset,
                                          start_offset_samples=0,
                                          stop_offset_samples=None,
                                          window_size_samples=100,
                                          window_stride_samples=100,
                                          drop_last_window=False,
                                          picks=picks,
                                          on_missing=on_missing,
                                          flat=flat,
                                          reject=reject)

    epochs = windows.datasets[0].windows
    assert epochs.ch_names == picks
    assert epochs.reject == reject
    assert epochs.flat == flat
Exemple #11
0
    # create one directory for every recording
    rec_path = os.path.join(OUT_PATH, str(rec_i))
    if not os.path.exists(rec_path):
        os.makedirs(rec_path)
    tuh_subset.save(rec_path)
    # save memory by deleting raw recording
    del tuh_subset.datasets[0].raw

###############################################################################
# We reload the preprocessed data again in a lazy fashion (`preload=False`).

tuh_loaded = load_concat_dataset(OUT_PATH, preload=False)

###############################################################################
# We generate compute windows. The resulting dataset is now ready to be used
# for model training.

window_size_samples = 1000
window_stride_samples = 1000
# generate compute windows here and store them to disk
tuh_windows = create_fixed_length_windows(
    tuh_loaded,
    start_offset_samples=0,
    stop_offset_samples=None,
    window_size_samples=window_size_samples,
    window_stride_samples=window_stride_samples,
    drop_last_window=False)
# store the number of windows required for loading later on
tuh_windows.set_description(
    {"n_windows": [len(d) for d in tuh_windows.datasets]})
Exemple #12
0
for i, (x, y, window_ind) in enumerate(windows_dataset):
    ax_arr[i].plot(x.T)
    ax_arr[i].set_ylim(-4e-5, 4e-5)
    ax_arr[i].set_title(f"label={y}")
    if i == max_i:
        break

fig.tight_layout()

###############################################################################
# Alternatively, we can create evenly spaced ("sliding") windows using a
# different windower.
sliding_windows_dataset = create_fixed_length_windows(
    dataset,
    start_offset_samples=0,
    stop_offset_samples=0,
    window_size_samples=1200,
    window_stride_samples=1000,
    drop_last_window=False)

print(len(sliding_windows_dataset))
for x, y, window_ind in sliding_windows_dataset:
    print(x.shape, y, window_ind)
    break

sliding_windows_dataset.description

###############################################################################
# Transforms can also be applied on windows in the same way as shown
# above on continuous data:
Exemple #13
0
# the preprocessed data is automatically reloaded with ``preload=False``.
#
# .. note::
#    Here we use ``n_jobs=2`` as the machines the documentation is build on
#    only have two cores. This number should be modified based on the machine
#    that is available for preprocessing.

OUT_PATH = tempfile.mkdtemp()  # please insert actual output directory here
tuh_preproc = preprocess(concat_ds=tuh,
                         preprocessors=preprocessors,
                         n_jobs=N_JOBS,
                         save_dir=OUT_PATH)

###############################################################################
# We can finally generate compute windows. The resulting dataset is now ready
# to be used for model training.

window_size_samples = 1000
window_stride_samples = 1000
# generate compute windows here and store them to disk
tuh_windows = create_fixed_length_windows(
    tuh_preproc,
    window_size_samples=window_size_samples,
    window_stride_samples=window_stride_samples,
    drop_last_window=False,
    n_jobs=N_JOBS,
)

for x, y, ind in tuh_windows:
    break
Exemple #14
0
######################################################################
# Cut Compute Windows
# ~~~~~~~~~~~~~~~~~~~
#

from braindecode.preprocessing import create_fixed_length_windows

# Create windows using braindecode function for this. It needs parameters to define how
# trials should be used.

train_set = create_fixed_length_windows(
    train_set,
    start_offset_samples=0,
    stop_offset_samples=None,
    window_size_samples=input_window_samples,
    window_stride_samples=n_preds_per_input,
    drop_last_window=False,
    targets_from='channels',
    last_target_only=False,
    preload=False)

valid_set = create_fixed_length_windows(
    valid_set,
    start_offset_samples=0,
    stop_offset_samples=None,
    window_size_samples=input_window_samples,
    window_stride_samples=n_preds_per_input,
    drop_last_window=False,
    targets_from='channels',
    last_target_only=False,
    preload=False)
def test_variable_length_trials_cropped_decoding():
    cuda = False
    set_random_seeds(seed=20210726, cuda=cuda)

    # create fake tuh abnormal dataset
    tuh = _TUHAbnormalMock(path='')
    # fake variable length trials by cropping first recording
    splits = tuh.split([[i] for i in range(len(tuh.datasets))])
    preprocess(
        concat_ds=splits['0'],
        preprocessors=[
            Preprocessor('crop', tmax=300),
        ],
    )
    variable_tuh = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets))])
    # make sure we actually have different length trials
    assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0)

    # create windows
    variable_tuh_windows = create_fixed_length_windows(
        concat_ds=variable_tuh,
        window_size_samples=1000,
        window_stride_samples=1000,
        drop_last_window=False,
        mapping={
            True: 1,
            False: 0
        },
    )

    # create train and valid set
    splits = variable_tuh_windows.split(
        [[i] for i in range(len(variable_tuh_windows.datasets))])
    variable_tuh_windows_train = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets) - 1)])
    variable_tuh_windows_valid = BaseConcatDataset(
        [splits[str(len(tuh.datasets) - 1)]])
    for x, y, ind in variable_tuh_windows_train:
        break
    train_split = predefined_split(variable_tuh_windows_valid)

    # initialize a model
    model = ShallowFBCSPNet(
        in_chans=x.shape[0],
        n_classes=len(tuh.description.pathological.unique()),
    )
    to_dense_prediction_model(model)
    if cuda:
        model.cuda()

    # create and train a classifier
    clf = EEGClassifier(
        model,
        cropped=True,
        criterion=CroppedLoss,
        criterion__loss_function=torch.nn.functional.nll_loss,
        optimizer=torch.optim.Adam,
        batch_size=32,
        callbacks=['accuracy'],
        train_split=train_split,
    )
    clf.fit(variable_tuh_windows_train, y=None, epochs=3)

    # make sure it does what we expect
    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array([
            0.689495325088501,
            0.1353449523448944,
            0.006638816092163324,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )

    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array([
            2.925871,
            3.611423,
            4.23494,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )
Exemple #16
0
x, y = tuh[-1]
print('x:', x)
print('y:', y)

###############################################################################
# We will skip preprocessing steps for now, since it is not the aim of this
# example. Instead, we will directly create compute windows. We specify a
# mapping from genders 'M' and 'F' to integers, since this is required for
# decoding.

tuh_windows = create_fixed_length_windows(
    tuh,
    start_offset_samples=0,
    stop_offset_samples=None,
    window_size_samples=1000,
    window_stride_samples=1000,
    drop_last_window=False,
    mapping={
        'M': 0,
        'F': 1
    },  # map non-digit targets
)
# store the number of windows required for loading later on
tuh_windows.set_description(
    {"n_windows": [len(d) for d in tuh_windows.datasets]})

###############################################################################
# Iterating through the dataset gives x as ndarray(n_channels x 1000), y as
# [age, gender], and ind. Let's look at the last example again.
x, y, ind = tuh_windows[-1]
print('x:', x)
print('y:', y)
        base_ds = BaseDataset(raw, fake_descrition, target_name="target")
        datasets.append(base_ds)
    dataset = BaseConcatDataset(datasets)
    return dataset


dataset = fake_regression_dataset(n_fake_recs=5,
                                  n_fake_chs=21,
                                  fake_sfreq=100,
                                  fake_duration_s=60)

windows_dataset = create_fixed_length_windows(
    dataset,
    start_offset_samples=0,
    stop_offset_samples=0,
    window_size_samples=input_window_samples,
    window_stride_samples=n_preds_per_input,
    drop_last_window=False,
    drop_bad_windows=True,
)

splits = windows_dataset.split("session")
train_set = splits["train"]
valid_set = splits["eval"]

regressor = EEGRegressor(
    model,
    cropped=True,
    criterion=CroppedLoss,
    criterion__loss_function=torch.nn.functional.mse_loss,
    optimizer=torch.optim.AdamW,