def test_load_concat_windows_dataset(setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset n_windows_datasets = len(concat_windows_dataset.datasets) with pytest.warns(UserWarning, match='This function only exists for ' 'backwards compatibility purposes. DO NOT USE!'): concat_windows_dataset._outdated_save(path=tmpdir, overwrite=False) with pytest.warns(UserWarning, match="The way your dataset was saved is deprecated by" " now. Please save it again using dataset.save()" "."): loaded_concat_windows_dataset = load_concat_dataset(path=tmpdir, preload=False) assert len(concat_windows_dataset) == len(loaded_concat_windows_dataset) assert (len(concat_windows_dataset.datasets) == len( loaded_concat_windows_dataset.datasets)) assert (len(concat_windows_dataset.description) == len( loaded_concat_windows_dataset.description)) for windows_i in range(n_windows_datasets): actual_x, actual_y, actual_crop_inds = concat_windows_dataset[ windows_i] x, y, crop_inds = loaded_concat_windows_dataset[windows_i] np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5) np.testing.assert_allclose(y, actual_y, rtol=1e-4, atol=1e-5) np.testing.assert_array_equal(crop_inds, actual_crop_inds) pd.testing.assert_frame_equal(concat_windows_dataset.description, loaded_concat_windows_dataset.description)
def preprocess(concat_ds, preprocessors, save_dir=None, overwrite=False, n_jobs=None): """Apply preprocessors to a concat dataset. Parameters ---------- concat_ds: BaseConcatDataset A concat of BaseDataset or WindowsDataset datasets to be preprocessed. preprocessors: list(Preprocessor) List of Preprocessor objects to apply to the dataset. save_dir : str | None If a string, the preprocessed data will be saved under the specified directory and the datasets in ``concat_ds`` will be reloaded with `preload=False`. overwrite : bool When `save_dir` is provided, controls whether to delete the old subdirectories that will be written to under `save_dir`. If False and the corresponding subdirectories already exist, a ``FileExistsError`` will be raised. n_jobs : int | None Number of jobs for parallel execution. Returns ------- BaseConcatDataset: Preprocessed dataset. """ # In case of serialization, make sure directory is available before # preprocessing if save_dir is not None and not overwrite: _check_save_dir_empty(save_dir) if not isinstance(preprocessors, Iterable): raise ValueError( 'preprocessors must be a list of Preprocessor objects.') for elem in preprocessors: assert hasattr( elem, 'apply'), ('Preprocessor object needs an `apply` method.') list_of_ds = Parallel(n_jobs=n_jobs)( delayed(_preprocess)(ds, i, preprocessors, save_dir, overwrite) for i, ds in enumerate(concat_ds.datasets)) if save_dir is not None: # Reload datasets and replace in concat_ds concat_ds_reloaded = load_concat_dataset(save_dir, preload=False, target_name=None) _replace_inplace(concat_ds, concat_ds_reloaded) else: if n_jobs is None or n_jobs == 1: # joblib did not make copies, the # preprocessing happened in-place # Recompute cumulative sizes as transforms might have changed them concat_ds.cumulative_sizes = concat_ds.cumsum(concat_ds.datasets) else: # joblib made copies _replace_inplace(concat_ds, BaseConcatDataset(list_of_ds)) return concat_ds
def test_load_concat_windows_dataset_parallel(setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset n_windows_datasets = len(concat_windows_dataset.datasets) # assert no warning raised with 'new' saving function with pytest.warns(None) as raised_warnings: concat_windows_dataset.save(path=tmpdir, overwrite=False) assert len(raised_warnings) == 0 # assert warning raised because of n_jobs not supported with mne.Epochs with pytest.warns(UserWarning, match='Parallelized reading with ' '`preload=False` is not supported for ' 'windowed data. Will use `n_jobs=1`.'): loaded_concat_windows_dataset = load_concat_dataset(path=tmpdir, preload=False, n_jobs=2) assert len(raised_warnings) == 0 assert len(concat_windows_dataset) == len(loaded_concat_windows_dataset) assert (len(concat_windows_dataset.datasets) == len( loaded_concat_windows_dataset.datasets)) assert (len(concat_windows_dataset.description) == len( loaded_concat_windows_dataset.description)) for windows_i in range(n_windows_datasets): actual_x, actual_y, actual_crop_inds = concat_windows_dataset[ windows_i] x, y, crop_inds = loaded_concat_windows_dataset[windows_i] np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5) np.testing.assert_allclose(y, actual_y, rtol=1e-4, atol=1e-5) np.testing.assert_array_equal(crop_inds, actual_crop_inds) pd.testing.assert_frame_equal(concat_windows_dataset.description, loaded_concat_windows_dataset.description)
def test_load_multiple_concat_raw_dataset(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset for i in range(2): path = os.path.join(tmpdir, str(i)) os.makedirs(path) save_concat_dataset(path=path, concat_dataset=concat_raw_dataset, overwrite=False) loaded_concat_raw_datasets = load_concat_dataset(path=tmpdir, preload=False) assert 2 * len(concat_raw_dataset) == len(loaded_concat_raw_datasets) assert (2 * len(concat_raw_dataset.datasets) == len( loaded_concat_raw_datasets.datasets)) assert (2 * len(concat_raw_dataset.description) == len( loaded_concat_raw_datasets.description))
def test_load_save_raw_preproc_kwargs(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset preprocess(concat_raw_dataset, [ Preprocessor('pick_channels', ch_names=['C3']), ]) concat_raw_dataset.save(tmpdir, overwrite=False) for i in range(len(concat_raw_dataset.datasets)): assert os.path.exists( os.path.join(tmpdir, str(i), 'raw_preproc_kwargs.json')) loaded_concat_raw_dataset = load_concat_dataset(tmpdir, preload=False) for ds in loaded_concat_raw_dataset.datasets: assert ds.raw_preproc_kwargs == [ ('pick_channels', { 'ch_names': ['C3'] }), ]
def test_load_save_window_preproc_kwargs(setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset concat_windows_dataset.save(tmpdir, overwrite=False) for i in range(len(concat_windows_dataset.datasets)): subdir = os.path.join(tmpdir, str(i)) assert os.path.exists(os.path.join(subdir, 'window_kwargs.json')) preprocess(concat_windows_dataset, [ Preprocessor('pick_channels', ch_names=['Cz']), ]) concat_windows_dataset.save(tmpdir, overwrite=True) for i in range(len(concat_windows_dataset.datasets)): subdir = os.path.join(tmpdir, str(i)) assert os.path.exists(os.path.join(subdir, 'window_kwargs.json')) assert os.path.exists( os.path.join(subdir, 'window_preproc_kwargs.json')) loaded_concat_windows_dataset = load_concat_dataset(tmpdir, preload=False) for ds in loaded_concat_windows_dataset.datasets: assert ds.window_kwargs == [('create_windows_from_events', { 'infer_mapping': True, 'infer_window_size_stride': True, 'trial_start_offset_samples': 0, 'trial_stop_offset_samples': 0, 'window_size_samples': None, 'window_stride_samples': None, 'drop_last_window': False, 'mapping': { 'feet': 0, 'left_hand': 1, 'right_hand': 2, 'tongue': 3 }, 'preload': False, 'drop_bad_windows': True, 'picks': None, 'reject': None, 'flat': None, 'on_missing': 'error', 'accepted_bads_ratio': 0.0 })] assert ds.window_preproc_kwargs == [ ('pick_channels', { 'ch_names': ['Cz'] }), ]
def test_load_concat_raw_dataset(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset n_raw_datasets = len(concat_raw_dataset.datasets) save_concat_dataset(path=tmpdir, concat_dataset=concat_raw_dataset, overwrite=False) loaded_concat_raw_dataset = load_concat_dataset(path=tmpdir, preload=False) assert len(concat_raw_dataset) == len(loaded_concat_raw_dataset) assert (len(concat_raw_dataset.datasets) == len( loaded_concat_raw_dataset.datasets)) assert (len(concat_raw_dataset.description) == len( loaded_concat_raw_dataset.description)) for raw_i in range(n_raw_datasets): actual_x, actual_y = concat_raw_dataset[raw_i] x, y = loaded_concat_raw_dataset[raw_i] np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5) pd.testing.assert_frame_equal(concat_raw_dataset.description, loaded_concat_raw_dataset.description)
def test_preprocess_overwrite(base_concat_ds, tmp_path, overwrite): preprocessors = [Preprocessor('crop', tmax=10, include_tmax=False)] # Create temporary directory with preexisting files save_dir = str(tmp_path) for i, ds in enumerate(base_concat_ds.datasets): concat_ds = BaseConcatDataset([ds]) save_subdir = os.path.join(save_dir, str(i)) os.makedirs(save_subdir) concat_ds.save(save_subdir, overwrite=True) if overwrite: preprocess(base_concat_ds, preprocessors, save_dir, overwrite=True) # Make sure the serialized data is preprocessed preproc_concat_ds = load_concat_dataset(save_dir, True) assert all([len(ds.raw.times) == 2500 for ds in preproc_concat_ds.datasets]) else: with pytest.raises(FileExistsError): preprocess(base_concat_ds, preprocessors, save_dir, overwrite=False)
def test_load_multiple_concat_raw_dataset(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset for i in range(2): path = os.path.join(tmpdir, str(i)) os.makedirs(path) with pytest.warns(UserWarning, match='This function only exists for ' 'backwards compatibility purposes. DO NOT ' 'USE!'): concat_raw_dataset._outdated_save(path=path, overwrite=False) with pytest.warns(UserWarning, match="The way your dataset was saved is " "deprecated by now. Please save it again " "using dataset.save()."): loaded_concat_raw_datasets = load_concat_dataset(path=tmpdir, preload=False) assert 2 * len(concat_raw_dataset) == len(loaded_concat_raw_datasets) assert (2 * len(concat_raw_dataset.datasets) == len( loaded_concat_raw_datasets.datasets)) assert (2 * len(concat_raw_dataset.description) == len( loaded_concat_raw_datasets.description))
def test_load_concat_windows_dataset(setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset n_windows_datasets = len(concat_windows_dataset.datasets) save_concat_dataset(path=tmpdir, concat_dataset=concat_windows_dataset, overwrite=False) loaded_concat_windows_dataset = load_concat_dataset(path=tmpdir, preload=False) assert len(concat_windows_dataset) == len(loaded_concat_windows_dataset) assert (len(concat_windows_dataset.datasets) == len( loaded_concat_windows_dataset.datasets)) assert (len(concat_windows_dataset.description) == len( loaded_concat_windows_dataset.description)) for windows_i in range(n_windows_datasets): actual_x, actual_y, actual_crop_inds = concat_windows_dataset[ windows_i] x, y, crop_inds = loaded_concat_windows_dataset[windows_i] np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5) np.testing.assert_allclose(y, actual_y, rtol=1e-4, atol=1e-5) np.testing.assert_array_equal(crop_inds, actual_crop_inds) pd.testing.assert_frame_equal(concat_windows_dataset.description, loaded_concat_windows_dataset.description)
def test_load_concat_raw_dataset_parallel(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset n_raw_datasets = len(concat_raw_dataset.datasets) # assert no warning raised with 'new' saving function with pytest.warns(None) as raised_warnings: concat_raw_dataset.save(path=tmpdir, overwrite=False) assert len(raised_warnings) == 0 # assert no warning raised with loading dataset saved in 'new' way with pytest.warns(None) as raised_warnings: loaded_concat_raw_dataset = load_concat_dataset(path=tmpdir, preload=False, n_jobs=2) assert len(raised_warnings) == 0 assert len(concat_raw_dataset) == len(loaded_concat_raw_dataset) assert (len(concat_raw_dataset.datasets) == len( loaded_concat_raw_dataset.datasets)) assert (len(concat_raw_dataset.description) == len( loaded_concat_raw_dataset.description)) for raw_i in range(n_raw_datasets): actual_x, actual_y = concat_raw_dataset[raw_i] x, y = loaded_concat_raw_dataset[raw_i] np.testing.assert_allclose(x, actual_x, rtol=1e-4, atol=1e-5) pd.testing.assert_frame_equal(concat_raw_dataset.description, loaded_concat_raw_dataset.description)
# choose to overwrite the existing files. ds.save( path='./', overwrite=False, ) ############################################################################## # We load the saved dataset from a directory. Signals can be preloaded in # compliance with mne. Optionally, only specific '.fif' files can be loaded # by specifying their ids. The target name can be changed, if the dataset # supports it (TUHAbnormal for example supports 'pathological', 'age', and # 'gender'. If you stored a preprocessed version with target 'pathological' # it is possible to change the target upon loading). ds_loaded = load_concat_dataset( path='./', preload=True, ids_to_load=[1,3], target_name=None, ) ############################################################################## # The serialization utility also supports WindowsDatasets, so we create # compute windows next. windows_ds = create_windows_from_events( concat_ds=ds_loaded, trial_start_offset_samples=0, trial_stop_offset_samples=0, ) ############################################################################## # Again, we save the dataset to an existing directory. It will create a # '-epo.fif' file for every dataset in the concat dataset. Additionally it
start_offset_samples=0, stop_offset_samples=None, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=False) # save memory by deleting raw recording del tuh_subset # store the number of windows required for loading later on tuh_windows.description["n_windows"] = [ len(d) for d in tuh_windows.datasets ] # create one directory for every recording rec_path = os.path.join(OUT_PATH, str(rec_i)) if not os.path.exists(rec_path): os.makedirs(rec_path) save_concat_dataset(rec_path, tuh_windows) out_i += 1 # save memory by catching epoched recording del tuh_windows else: # store raws to disk for option of using different compute window # sizes pass ############################################################################### # We load the preprocessed data again in a lazy fashion (`preload=False`). It is # now ready to be used for model training. tuh_loaded = load_concat_dataset('./tuh_sample/', preload=False)
'n_samples': [len(d) for d in tuh_subset.datasets], }, overwrite=True) # create one directory for every recording rec_path = os.path.join(OUT_PATH, str(rec_i)) if not os.path.exists(rec_path): os.makedirs(rec_path) tuh_subset.save(rec_path) # save memory by deleting raw recording del tuh_subset.datasets[0].raw ############################################################################### # We reload the preprocessed data again in a lazy fashion (`preload=False`). tuh_loaded = load_concat_dataset(OUT_PATH, preload=False) ############################################################################### # We generate compute windows. The resulting dataset is now ready to be used # for model training. window_size_samples = 1000 window_stride_samples = 1000 # generate compute windows here and store them to disk tuh_windows = create_fixed_length_windows( tuh_loaded, start_offset_samples=0, stop_offset_samples=None, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=False)