Exemple #1
0
def load_train_valid_tuh(n_subjects, n_seconds, ids_to_load):
    path = '/home/schirrmr/data/preproced-tuh/all-sensors-32-hz/'
    log.info("Load concat dataset...")
    dataset = load_concat_dataset(path, preload=False, ids_to_load=ids_to_load)
    whole_train_set = dataset.split('session')['train']
    n_max_minutes = int(np.ceil(n_seconds / 60) + 2)
    sfreq = whole_train_set.datasets[0].raw.info['sfreq']
    log.info("Preprocess concat dataset...")
    preprocess(whole_train_set, [
        MNEPreproc('crop', tmin=0, tmax=n_max_minutes * 60, include_tmax=True),
        NumpyPreproc(fn=lambda x: np.clip(x, -80, 80)),
        NumpyPreproc(fn=lambda x: x / 3),
        NumpyPreproc(fn=exponential_moving_demean,
                     init_block_size=int(sfreq * 10),
                     factor_new=1 / (sfreq * 5)),
    ])
    subject_datasets = whole_train_set.split('subject')

    n_split = int(np.round(n_subjects * 0.75))
    keys = list(subject_datasets.keys())
    train_sets = [
        d for i in range(n_split) for d in subject_datasets[keys[i]].datasets
    ]
    train_set = BaseConcatDataset(train_sets)
    valid_sets = [
        d for i in range(n_split, n_subjects)
        for d in subject_datasets[keys[i]].datasets
    ]
    valid_set = BaseConcatDataset(valid_sets)

    train_set = create_fixed_length_windows(
        train_set,
        start_offset_samples=60 * 32,
        stop_offset_samples=60 * 32 + 32 * n_seconds,
        preload=True,
        window_size_samples=128,
        window_stride_samples=64,
        drop_last_window=True,
    )

    valid_set = create_fixed_length_windows(
        valid_set,
        start_offset_samples=60 * 32,
        stop_offset_samples=60 * 32 + 32 * n_seconds,
        preload=True,
        window_size_samples=128,
        window_stride_samples=64,
        drop_last_window=True,
    )
    return train_set, valid_set
def windows_concat_ds(base_concat_ds):
    return create_fixed_length_windows(base_concat_ds,
                                       start_offset_samples=100,
                                       stop_offset_samples=0,
                                       window_size_samples=1000,
                                       window_stride_samples=1000,
                                       drop_last_window=True,
                                       mapping=None,
                                       preload=True)
Exemple #3
0
def windows_ds():
    raws, description = fetch_data_with_moabb(dataset_name='BNCI2014001',
                                              subject_ids=4)
    ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)]
    concat_ds = BaseConcatDataset(ds)

    windows_ds = create_fixed_length_windows(concat_ds=concat_ds,
                                             start_offset_samples=0,
                                             stop_offset_samples=None,
                                             window_size_samples=500,
                                             window_stride_samples=500,
                                             drop_last_window=False,
                                             preload=False)

    return windows_ds
def load_example_data(preload, window_len_s, n_subjects=10):
    """Create windowed dataset from subjects of the TUH Abnormal dataset.

    Parameters
    ----------
    preload: bool
        If True, use eager loading, otherwise use lazy loading.
    n_subjects: int
        Number of subjects to load.

    Returns
    -------
    windows_ds: BaseConcatDataset
        Windowed data.

    .. warning::
        The recordings from the TUH Abnormal corpus do not all share the same
        sampling rate. The following assumes that the files have already been
        resampled to a common sampling rate.
    """
    subject_ids = list(range(n_subjects))
    ds = TUHAbnormal(TUH_PATH,
                     subject_ids=subject_ids,
                     target_name='pathological',
                     preload=preload)

    fs = ds.datasets[0].raw.info['sfreq']
    window_len_samples = int(fs * window_len_s)
    window_stride_samples = int(fs * 4)
    # window_stride_samples = int(fs * window_len_s)
    windows_ds = create_fixed_length_windows(
        ds,
        start_offset_samples=0,
        stop_offset_samples=None,
        window_size_samples=window_len_samples,
        window_stride_samples=window_stride_samples,
        drop_last_window=True,
        preload=preload,
        drop_bad_windows=True)

    # Drop bad epochs
    # XXX: This could be parallelized.
    # XXX: Also, this could be implemented in the Dataset object itself.
    for ds in windows_ds.datasets:
        ds.windows.drop_bad()
        assert ds.windows.preload == preload

    return windows_ds
Exemple #5
0
from braindecode.datasets import MOABBDataset
from braindecode.datautil.preprocess import preprocess, zscore, scale, \
    Preprocessor, filterbank, exponential_moving_demean, \
    exponential_moving_standardize, MNEPreproc, NumpyPreproc
from braindecode.datautil.windowers import create_fixed_length_windows

# We can't use fixtures with scope='module' as the dataset objects are modified
# inplace during preprocessing. To avoid the long setup time caused by calling
# the dataset/windowing functions multiple times, we instantiate the dataset
# objects once and deep-copy them in fixture.
raw_ds = MOABBDataset(dataset_name='BNCI2014001', subject_ids=[1, 2])
windows_ds = create_fixed_length_windows(raw_ds,
                                         start_offset_samples=100,
                                         stop_offset_samples=None,
                                         window_size_samples=1000,
                                         window_stride_samples=1000,
                                         drop_last_window=True,
                                         mapping=None,
                                         preload=True)


@pytest.fixture
def base_concat_ds():
    return copy.deepcopy(raw_ds)


@pytest.fixture
def windows_concat_ds():
    return copy.deepcopy(windows_ds)

for rec_i, tuh_subset in tuh_splits.items():
    # implement preprocess for BaseDatasets? Would remove necessity
    # to split above
    preprocess(tuh_subset, preprocessors)

    # update description of the recording(s)
    tuh_subset.description.sfreq = len(tuh_subset.datasets) * [sfreq]
    tuh_subset.description.reference = len(tuh_subset.datasets) * ['ar']
    tuh_subset.description.n_samples = [len(d) for d in tuh_subset.datasets]

    if create_compute_windows:
        # generate compute windows here and store them to disk
        tuh_windows = create_fixed_length_windows(
            tuh_subset,
            start_offset_samples=0,
            stop_offset_samples=None,
            window_size_samples=window_size_samples,
            window_stride_samples=window_stride_samples,
            drop_last_window=False
        )
        # save memory by deleting raw recording
        del tuh_subset
        # store the number of windows required for loading later on
        tuh_windows.description["n_windows"] = [len(d) for d in
                                                tuh_windows.datasets]

        # create one directory for every recording
        rec_path = os.path.join(OUT_PATH, str(rec_i))
        if not os.path.exists(rec_path):
            os.makedirs(rec_path)
        save_concat_dataset(rec_path, tuh_windows)
        out_i += 1
Exemple #7
0
                           figsize=((max_i + 1) * 7, 5),
                           sharex=True,
                           sharey=True)
for i, (x, y, window_ind) in enumerate(windows_ds):
    ax_arr[i].plot(x.T)
    ax_arr[i].set_ylim(-0.0002, 0.0002)
    ax_arr[i].set_title(f"label={y}")
    if i == max_i:
        break

###############################################################################
# Alternatively, we can create evenly spaced ("sliding") windows using a
# different windower.
sliding_windows_ds = create_fixed_length_windows(ds,
                                                 start_offset_samples=0,
                                                 stop_offset_samples=0,
                                                 window_size_samples=1200,
                                                 window_stride_samples=1000,
                                                 drop_last_window=False)

print(len(sliding_windows_ds))
for x, y, window_ind in sliding_windows_ds:
    print(x.shape, y, window_ind)
    break

###############################################################################
# Transforms can also be applied on windows in the same way as shown
# above on continuous data:


def crop_windows(windows, start_offset_samples, stop_offset_samples):
    fs = windows.info["sfreq"]
Exemple #8
0
                           figsize=((max_i + 1) * 7, 5),
                           sharex=True,
                           sharey=True)
for i, (x, y, supercrop_ind) in enumerate(windows_ds):
    ax_arr[i].plot(x.T)
    ax_arr[i].set_ylim(-0.0002, 0.0002)
    ax_arr[i].set_title(f"label={y}")
    if i == max_i:
        break

###############################################################################
# Alternatively, we can create evenly spaced ("sliding") windows using a
# different windower.
sliding_windows_ds = create_fixed_length_windows(ds,
                                                 start_offset_samples=0,
                                                 stop_offset_samples=None,
                                                 supercrop_size_samples=1200,
                                                 supercrop_stride_samples=1000,
                                                 drop_samples=False)

print(len(sliding_windows_ds))
for x, y, supercrop_ind in sliding_windows_ds:
    print(x.shape, y, supercrop_ind)
    break

###############################################################################
# Transforms can also be applied on supercrops/windows in the same way as shown
# above on continuous data:


def crop_windows(windows, start_offset_samples, stop_offset_samples):
    fs = windows.info["sfreq"]
Exemple #9
0
def create_from_X_y(X,
                    y,
                    drop_last_window,
                    sfreq=None,
                    ch_names=None,
                    window_size_samples=None,
                    window_stride_samples=None):
    """Create a BaseConcatDataset of WindowsDatasets from X and y to be used for
    decoding with skorch and braindecode, where X is a list of pre-cut trials
    and y are corresponding targets.

    Parameters
    ----------
    X: array-like
        list of pre-cut trials as n_trials x n_channels x n_times
    y: array-like
        targets corresponding to the trials
    sfreq: common sampling frequency of all trials
    ch_names: array-like
        channel names of the trials
    drop_last_window: bool
        whether or not have a last overlapping window, when
        windows/windows do not equally divide the continuous signal
    window_size_samples: int
        window size
    window_stride_samples: int
        stride between windows

    Returns
    -------
    windows_datasets: BaseConcatDataset
        X and y transformed to a dataset format that is compatible with skorch
        and braindecode
    """
    n_samples_per_x = []
    base_datasets = []
    if sfreq is None:
        sfreq = 100
        log.info("No sampling frequency given, set to 100 Hz.")
    if ch_names is None:
        ch_names = [str(i) for i in range(X.shape[1])]
        log.info(f"No channel names given, set to 0-{X.shape[1]}).")

    for x, target in zip(X, y):
        n_samples_per_x.append(x.shape[1])
        info = mne.create_info(ch_names=ch_names, sfreq=sfreq)
        raw = mne.io.RawArray(x, info)
        base_dataset = BaseDataset(raw,
                                   pd.Series({"target": target}),
                                   target_name="target")
        base_datasets.append(base_dataset)
    base_datasets = BaseConcatDataset(base_datasets)

    if window_size_samples is None and window_stride_samples is None:
        if not len(np.unique(n_samples_per_x)) == 1:
            raise ValueError(f"if 'window_size_samples' and "
                             f"'window_stride_samples' are None, "
                             f"all trials have to have the same length")
        window_size_samples = n_samples_per_x[0]
        window_stride_samples = n_samples_per_x[0]
    windows_datasets = create_fixed_length_windows(
        base_datasets,
        start_offset_samples=0,
        stop_offset_samples=0,
        window_size_samples=window_size_samples,
        window_stride_samples=window_stride_samples,
        drop_last_window=drop_last_window)
    return windows_datasets