Esempio n. 1
0
def test_load_concat_windows_dataset(setup_concat_windows_dataset, tmpdir):
    concat_windows_dataset = setup_concat_windows_dataset
    n_windows_datasets = len(concat_windows_dataset.datasets)
    with pytest.warns(UserWarning,
                      match='This function only exists for '
                      'backwards compatibility purposes. DO NOT USE!'):
        concat_windows_dataset._outdated_save(path=tmpdir, overwrite=False)
    with pytest.warns(UserWarning,
                      match="The way your dataset was saved is deprecated by"
                      " now. Please save it again using dataset.save()"
                      "."):
        loaded_concat_windows_dataset = load_concat_dataset(path=tmpdir,
                                                            preload=False)
    assert len(concat_windows_dataset) == len(loaded_concat_windows_dataset)
    assert (len(concat_windows_dataset.datasets) == len(
        loaded_concat_windows_dataset.datasets))
    assert (len(concat_windows_dataset.description) == len(
        loaded_concat_windows_dataset.description))
    for windows_i in range(n_windows_datasets):
        actual_x, actual_y, actual_crop_inds = concat_windows_dataset[
            windows_i]
        x, y, crop_inds = loaded_concat_windows_dataset[windows_i]
        np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5)
        np.testing.assert_allclose(y, actual_y, rtol=1e-4, atol=1e-5)
        np.testing.assert_array_equal(crop_inds, actual_crop_inds)
    pd.testing.assert_frame_equal(concat_windows_dataset.description,
                                  loaded_concat_windows_dataset.description)
Esempio n. 2
0
def preprocess(concat_ds,
               preprocessors,
               save_dir=None,
               overwrite=False,
               n_jobs=None):
    """Apply preprocessors to a concat dataset.

    Parameters
    ----------
    concat_ds: BaseConcatDataset
        A concat of BaseDataset or WindowsDataset datasets to be preprocessed.
    preprocessors: list(Preprocessor)
        List of Preprocessor objects to apply to the dataset.
    save_dir : str | None
        If a string, the preprocessed data will be saved under the specified
        directory and the datasets in ``concat_ds`` will be reloaded with
        `preload=False`.
    overwrite : bool
        When `save_dir` is provided, controls whether to delete the old
        subdirectories that will be written to under `save_dir`. If False and
        the corresponding subdirectories already exist, a ``FileExistsError``
        will be raised.
    n_jobs : int | None
        Number of jobs for parallel execution.

    Returns
    -------
    BaseConcatDataset:
        Preprocessed dataset.
    """
    # In case of serialization, make sure directory is available before
    # preprocessing
    if save_dir is not None and not overwrite:
        _check_save_dir_empty(save_dir)

    if not isinstance(preprocessors, Iterable):
        raise ValueError(
            'preprocessors must be a list of Preprocessor objects.')
    for elem in preprocessors:
        assert hasattr(
            elem, 'apply'), ('Preprocessor object needs an `apply` method.')

    list_of_ds = Parallel(n_jobs=n_jobs)(
        delayed(_preprocess)(ds, i, preprocessors, save_dir, overwrite)
        for i, ds in enumerate(concat_ds.datasets))

    if save_dir is not None:  # Reload datasets and replace in concat_ds
        concat_ds_reloaded = load_concat_dataset(save_dir,
                                                 preload=False,
                                                 target_name=None)
        _replace_inplace(concat_ds, concat_ds_reloaded)
    else:
        if n_jobs is None or n_jobs == 1:  # joblib did not make copies, the
            # preprocessing happened in-place
            # Recompute cumulative sizes as transforms might have changed them
            concat_ds.cumulative_sizes = concat_ds.cumsum(concat_ds.datasets)
        else:  # joblib made copies
            _replace_inplace(concat_ds, BaseConcatDataset(list_of_ds))

    return concat_ds
Esempio n. 3
0
def test_load_concat_windows_dataset_parallel(setup_concat_windows_dataset,
                                              tmpdir):
    concat_windows_dataset = setup_concat_windows_dataset
    n_windows_datasets = len(concat_windows_dataset.datasets)
    # assert no warning raised with 'new' saving function
    with pytest.warns(None) as raised_warnings:
        concat_windows_dataset.save(path=tmpdir, overwrite=False)
        assert len(raised_warnings) == 0
    # assert warning raised because of n_jobs not supported with mne.Epochs
    with pytest.warns(UserWarning,
                      match='Parallelized reading with '
                      '`preload=False` is not supported for '
                      'windowed data. Will use `n_jobs=1`.'):
        loaded_concat_windows_dataset = load_concat_dataset(path=tmpdir,
                                                            preload=False,
                                                            n_jobs=2)
        assert len(raised_warnings) == 0
    assert len(concat_windows_dataset) == len(loaded_concat_windows_dataset)
    assert (len(concat_windows_dataset.datasets) == len(
        loaded_concat_windows_dataset.datasets))
    assert (len(concat_windows_dataset.description) == len(
        loaded_concat_windows_dataset.description))
    for windows_i in range(n_windows_datasets):
        actual_x, actual_y, actual_crop_inds = concat_windows_dataset[
            windows_i]
        x, y, crop_inds = loaded_concat_windows_dataset[windows_i]
        np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5)
        np.testing.assert_allclose(y, actual_y, rtol=1e-4, atol=1e-5)
        np.testing.assert_array_equal(crop_inds, actual_crop_inds)
    pd.testing.assert_frame_equal(concat_windows_dataset.description,
                                  loaded_concat_windows_dataset.description)
Esempio n. 4
0
def test_load_multiple_concat_raw_dataset(setup_concat_raw_dataset, tmpdir):
    concat_raw_dataset = setup_concat_raw_dataset
    for i in range(2):
        path = os.path.join(tmpdir, str(i))
        os.makedirs(path)
        save_concat_dataset(path=path,
                            concat_dataset=concat_raw_dataset,
                            overwrite=False)
    loaded_concat_raw_datasets = load_concat_dataset(path=tmpdir,
                                                     preload=False)
    assert 2 * len(concat_raw_dataset) == len(loaded_concat_raw_datasets)
    assert (2 * len(concat_raw_dataset.datasets) == len(
        loaded_concat_raw_datasets.datasets))
    assert (2 * len(concat_raw_dataset.description) == len(
        loaded_concat_raw_datasets.description))
Esempio n. 5
0
def test_load_save_raw_preproc_kwargs(setup_concat_raw_dataset, tmpdir):
    concat_raw_dataset = setup_concat_raw_dataset
    preprocess(concat_raw_dataset, [
        Preprocessor('pick_channels', ch_names=['C3']),
    ])
    concat_raw_dataset.save(tmpdir, overwrite=False)
    for i in range(len(concat_raw_dataset.datasets)):
        assert os.path.exists(
            os.path.join(tmpdir, str(i), 'raw_preproc_kwargs.json'))
    loaded_concat_raw_dataset = load_concat_dataset(tmpdir, preload=False)
    for ds in loaded_concat_raw_dataset.datasets:
        assert ds.raw_preproc_kwargs == [
            ('pick_channels', {
                'ch_names': ['C3']
            }),
        ]
Esempio n. 6
0
def test_load_save_window_preproc_kwargs(setup_concat_windows_dataset, tmpdir):
    concat_windows_dataset = setup_concat_windows_dataset
    concat_windows_dataset.save(tmpdir, overwrite=False)
    for i in range(len(concat_windows_dataset.datasets)):
        subdir = os.path.join(tmpdir, str(i))
        assert os.path.exists(os.path.join(subdir, 'window_kwargs.json'))

    preprocess(concat_windows_dataset, [
        Preprocessor('pick_channels', ch_names=['Cz']),
    ])
    concat_windows_dataset.save(tmpdir, overwrite=True)
    for i in range(len(concat_windows_dataset.datasets)):
        subdir = os.path.join(tmpdir, str(i))
        assert os.path.exists(os.path.join(subdir, 'window_kwargs.json'))
        assert os.path.exists(
            os.path.join(subdir, 'window_preproc_kwargs.json'))
    loaded_concat_windows_dataset = load_concat_dataset(tmpdir, preload=False)

    for ds in loaded_concat_windows_dataset.datasets:
        assert ds.window_kwargs == [('create_windows_from_events', {
            'infer_mapping': True,
            'infer_window_size_stride': True,
            'trial_start_offset_samples': 0,
            'trial_stop_offset_samples': 0,
            'window_size_samples': None,
            'window_stride_samples': None,
            'drop_last_window': False,
            'mapping': {
                'feet': 0,
                'left_hand': 1,
                'right_hand': 2,
                'tongue': 3
            },
            'preload': False,
            'drop_bad_windows': True,
            'picks': None,
            'reject': None,
            'flat': None,
            'on_missing': 'error',
            'accepted_bads_ratio': 0.0
        })]
        assert ds.window_preproc_kwargs == [
            ('pick_channels', {
                'ch_names': ['Cz']
            }),
        ]
Esempio n. 7
0
def test_load_concat_raw_dataset(setup_concat_raw_dataset, tmpdir):
    concat_raw_dataset = setup_concat_raw_dataset
    n_raw_datasets = len(concat_raw_dataset.datasets)
    save_concat_dataset(path=tmpdir,
                        concat_dataset=concat_raw_dataset,
                        overwrite=False)
    loaded_concat_raw_dataset = load_concat_dataset(path=tmpdir, preload=False)
    assert len(concat_raw_dataset) == len(loaded_concat_raw_dataset)
    assert (len(concat_raw_dataset.datasets) == len(
        loaded_concat_raw_dataset.datasets))
    assert (len(concat_raw_dataset.description) == len(
        loaded_concat_raw_dataset.description))
    for raw_i in range(n_raw_datasets):
        actual_x, actual_y = concat_raw_dataset[raw_i]
        x, y = loaded_concat_raw_dataset[raw_i]
        np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5)
    pd.testing.assert_frame_equal(concat_raw_dataset.description,
                                  loaded_concat_raw_dataset.description)
Esempio n. 8
0
def test_preprocess_overwrite(base_concat_ds, tmp_path, overwrite):
    preprocessors = [Preprocessor('crop', tmax=10, include_tmax=False)]

    # Create temporary directory with preexisting files
    save_dir = str(tmp_path)
    for i, ds in enumerate(base_concat_ds.datasets):
        concat_ds = BaseConcatDataset([ds])
        save_subdir = os.path.join(save_dir, str(i))
        os.makedirs(save_subdir)
        concat_ds.save(save_subdir, overwrite=True)

    if overwrite:
        preprocess(base_concat_ds, preprocessors, save_dir, overwrite=True)
        # Make sure the serialized data is preprocessed
        preproc_concat_ds = load_concat_dataset(save_dir, True)
        assert all([len(ds.raw.times) == 2500
                    for ds in preproc_concat_ds.datasets])
    else:
        with pytest.raises(FileExistsError):
            preprocess(base_concat_ds, preprocessors, save_dir,
                       overwrite=False)
Esempio n. 9
0
def test_load_multiple_concat_raw_dataset(setup_concat_raw_dataset, tmpdir):
    concat_raw_dataset = setup_concat_raw_dataset
    for i in range(2):
        path = os.path.join(tmpdir, str(i))
        os.makedirs(path)
        with pytest.warns(UserWarning,
                          match='This function only exists for '
                          'backwards compatibility purposes. DO NOT '
                          'USE!'):
            concat_raw_dataset._outdated_save(path=path, overwrite=False)
        with pytest.warns(UserWarning,
                          match="The way your dataset was saved is "
                          "deprecated by now. Please save it again "
                          "using dataset.save()."):
            loaded_concat_raw_datasets = load_concat_dataset(path=tmpdir,
                                                             preload=False)
    assert 2 * len(concat_raw_dataset) == len(loaded_concat_raw_datasets)
    assert (2 * len(concat_raw_dataset.datasets) == len(
        loaded_concat_raw_datasets.datasets))
    assert (2 * len(concat_raw_dataset.description) == len(
        loaded_concat_raw_datasets.description))
Esempio n. 10
0
def test_load_concat_windows_dataset(setup_concat_windows_dataset, tmpdir):
    concat_windows_dataset = setup_concat_windows_dataset
    n_windows_datasets = len(concat_windows_dataset.datasets)
    save_concat_dataset(path=tmpdir,
                        concat_dataset=concat_windows_dataset,
                        overwrite=False)
    loaded_concat_windows_dataset = load_concat_dataset(path=tmpdir,
                                                        preload=False)
    assert len(concat_windows_dataset) == len(loaded_concat_windows_dataset)
    assert (len(concat_windows_dataset.datasets) == len(
        loaded_concat_windows_dataset.datasets))
    assert (len(concat_windows_dataset.description) == len(
        loaded_concat_windows_dataset.description))
    for windows_i in range(n_windows_datasets):
        actual_x, actual_y, actual_crop_inds = concat_windows_dataset[
            windows_i]
        x, y, crop_inds = loaded_concat_windows_dataset[windows_i]
        np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5)
        np.testing.assert_allclose(y, actual_y, rtol=1e-4, atol=1e-5)
        np.testing.assert_array_equal(crop_inds, actual_crop_inds)
    pd.testing.assert_frame_equal(concat_windows_dataset.description,
                                  loaded_concat_windows_dataset.description)
Esempio n. 11
0
def test_load_concat_raw_dataset_parallel(setup_concat_raw_dataset, tmpdir):
    concat_raw_dataset = setup_concat_raw_dataset
    n_raw_datasets = len(concat_raw_dataset.datasets)
    # assert no warning raised with 'new' saving function
    with pytest.warns(None) as raised_warnings:
        concat_raw_dataset.save(path=tmpdir, overwrite=False)
        assert len(raised_warnings) == 0
    # assert no warning raised with loading dataset saved in 'new' way
    with pytest.warns(None) as raised_warnings:
        loaded_concat_raw_dataset = load_concat_dataset(path=tmpdir,
                                                        preload=False,
                                                        n_jobs=2)
        assert len(raised_warnings) == 0
    assert len(concat_raw_dataset) == len(loaded_concat_raw_dataset)
    assert (len(concat_raw_dataset.datasets) == len(
        loaded_concat_raw_dataset.datasets))
    assert (len(concat_raw_dataset.description) == len(
        loaded_concat_raw_dataset.description))
    for raw_i in range(n_raw_datasets):
        actual_x, actual_y = concat_raw_dataset[raw_i]
        x, y = loaded_concat_raw_dataset[raw_i]
        np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5)
    pd.testing.assert_frame_equal(concat_raw_dataset.description,
                                  loaded_concat_raw_dataset.description)
# choose to overwrite the existing files.
ds.save(
    path='./',
    overwrite=False,
)

##############################################################################
# We load the saved dataset from a directory. Signals can be preloaded in 
# compliance with mne. Optionally, only specific '.fif' files can be loaded 
# by specifying their ids. The target name can be changed, if the dataset 
# supports it (TUHAbnormal for example supports 'pathological', 'age', and 
# 'gender'. If you stored a preprocessed version with target 'pathological' 
# it is possible to change the target upon loading).
ds_loaded = load_concat_dataset(
    path='./',
    preload=True,
    ids_to_load=[1,3],
    target_name=None,
)

##############################################################################
# The serialization utility also supports WindowsDatasets, so we create 
# compute windows next.
windows_ds = create_windows_from_events(
    concat_ds=ds_loaded,
    trial_start_offset_samples=0,
    trial_stop_offset_samples=0,
)

##############################################################################
# Again, we save the dataset to an existing directory. It will create a 
# '-epo.fif' file for every dataset in the concat dataset. Additionally it 
Esempio n. 13
0
            start_offset_samples=0,
            stop_offset_samples=None,
            window_size_samples=window_size_samples,
            window_stride_samples=window_stride_samples,
            drop_last_window=False)
        # save memory by deleting raw recording
        del tuh_subset
        # store the number of windows required for loading later on
        tuh_windows.description["n_windows"] = [
            len(d) for d in tuh_windows.datasets
        ]

        # create one directory for every recording
        rec_path = os.path.join(OUT_PATH, str(rec_i))
        if not os.path.exists(rec_path):
            os.makedirs(rec_path)
        save_concat_dataset(rec_path, tuh_windows)
        out_i += 1
        # save memory by catching epoched recording
        del tuh_windows
    else:
        # store raws to disk for option of using different compute window
        # sizes
        pass

###############################################################################
# We load the preprocessed data again in a lazy fashion (`preload=False`). It is
# now ready to be used for model training.

tuh_loaded = load_concat_dataset('./tuh_sample/', preload=False)
Esempio n. 14
0
            'n_samples': [len(d) for d in tuh_subset.datasets],
        },
        overwrite=True)

    # create one directory for every recording
    rec_path = os.path.join(OUT_PATH, str(rec_i))
    if not os.path.exists(rec_path):
        os.makedirs(rec_path)
    tuh_subset.save(rec_path)
    # save memory by deleting raw recording
    del tuh_subset.datasets[0].raw

###############################################################################
# We reload the preprocessed data again in a lazy fashion (`preload=False`).

tuh_loaded = load_concat_dataset(OUT_PATH, preload=False)

###############################################################################
# We generate compute windows. The resulting dataset is now ready to be used
# for model training.

window_size_samples = 1000
window_stride_samples = 1000
# generate compute windows here and store them to disk
tuh_windows = create_fixed_length_windows(
    tuh_loaded,
    start_offset_samples=0,
    stop_offset_samples=None,
    window_size_samples=window_size_samples,
    window_stride_samples=window_stride_samples,
    drop_last_window=False)