def test_windows_fixed_length_cropped(lazy_loadable_dataset): """Test fixed length windowing on cropped data. Cropping raw data changes the `first_samp` attribute of the Raw object, and so it is important to test this is taken into account by the windowers. """ tmin, tmax = 100, 120 ds = copy.deepcopy(lazy_loadable_dataset) ds.datasets[0].raw.annotations.crop(tmin, tmax) crop_ds = copy.deepcopy(lazy_loadable_dataset) crop_transform = Preprocessor('crop', tmin=tmin, tmax=tmax) preprocess(crop_ds, [crop_transform]) # Extract windows sfreq = ds.datasets[0].raw.info['sfreq'] tmin_samples, tmax_samples = int(tmin * sfreq), int(tmax * sfreq) windows1 = create_fixed_length_windows(concat_ds=ds, start_offset_samples=tmin_samples, stop_offset_samples=tmax_samples, window_size_samples=100, window_stride_samples=100, drop_last_window=True) windows2 = create_fixed_length_windows(concat_ds=crop_ds, start_offset_samples=0, stop_offset_samples=None, window_size_samples=100, window_stride_samples=100, drop_last_window=True) assert (windows1[0][0] == windows2[0][0]).all()
def test_window_sizes_too_large(concat_ds_targets): concat_ds, targets = concat_ds_targets # Window size larger than all trials window_size = len(concat_ds.datasets[0]) + 1 with pytest.raises( ValueError, match=f'Window size {window_size} exceeds trial durat'): create_windows_from_events( concat_ds=concat_ds, window_size_samples=window_size, window_stride_samples=window_size, trial_start_offset_samples=0, trial_stop_offset_samples=0, drop_last_window=False, ) with pytest.raises( ValueError, match=f'Window size {window_size} exceeds trial durat'): create_fixed_length_windows( concat_ds=concat_ds, window_size_samples=window_size, window_stride_samples=window_size, drop_last_window=False, ) # Window size larger than one single trial annots = concat_ds.datasets[0].raw.annotations annot_0 = annots[0] # Window equal original trials size window_size = int( annot_0["duration"] * concat_ds.datasets[0].raw.info['sfreq']) # Make first trial 1 second shorter annot_0["duration"] -= 1 # Replace first trial by a new shorter one annots.delete(0) del annot_0["orig_time"] annots.append(**annot_0) concat_ds.datasets[0].raw.set_annotations(annots) with pytest.warns( UserWarning, match=".* are being dropped as the window size .*" ): create_windows_from_events( concat_ds=concat_ds, window_size_samples=window_size, window_stride_samples=window_size, trial_start_offset_samples=0, trial_stop_offset_samples=0, drop_last_window=False, accepted_bads_ratio=0.5, on_missing='ignore' )
def test_drop_bad_windows(concat_ds_targets, drop_bad_windows, preload): concat_ds, _ = concat_ds_targets windows_from_events = create_windows_from_events( concat_ds=concat_ds, trial_start_offset_samples=0, trial_stop_offset_samples=0, window_size_samples=100, window_stride_samples=100, drop_last_window=False, preload=preload, drop_bad_windows=drop_bad_windows) windows_fixed_length = create_fixed_length_windows( concat_ds=concat_ds, start_offset_samples=0, stop_offset_samples=1000, window_size_samples=1000, window_stride_samples=1000, drop_last_window=False, preload=preload, drop_bad_windows=drop_bad_windows) assert (windows_from_events.datasets[0].windows._bad_dropped == drop_bad_windows) assert (windows_fixed_length.datasets[0].windows._bad_dropped == drop_bad_windows)
def test_fixed_length_windows_preload_false(lazy_loadable_dataset): windows = create_fixed_length_windows( concat_ds=lazy_loadable_dataset, start_offset_samples=0, stop_offset_samples=100, window_size_samples=100, window_stride_samples=100, drop_last_window=False, preload=False) assert all([not ds.windows.preload for ds in windows.datasets])
def test_epochs_kwargs(lazy_loadable_dataset): picks = ['ch0'] on_missing = 'warning' flat = {'eeg': 3e-6} reject = {'eeg': 43e-6} windows = create_windows_from_events( concat_ds=lazy_loadable_dataset, trial_start_offset_samples=0, trial_stop_offset_samples=0, window_size_samples=100, window_stride_samples=100, drop_last_window=False, picks=picks, on_missing=on_missing, flat=flat, reject=reject) epochs = windows.datasets[0].windows assert epochs.ch_names == picks assert epochs.reject == reject assert epochs.flat == flat for ds in windows.datasets: assert ds.window_kwargs == [ ('create_windows_from_events', { 'infer_mapping': True, 'infer_window_size_stride': False, 'trial_start_offset_samples': 0, 'trial_stop_offset_samples': 0, 'window_size_samples': 100, 'window_stride_samples': 100, 'drop_last_window': False, 'mapping': {'test': 0}, 'preload': False, 'drop_bad_windows': True, 'picks': picks, 'reject': reject, 'flat': flat, 'on_missing': on_missing, 'accepted_bads_ratio': 0.0}) ] windows = create_fixed_length_windows( concat_ds=lazy_loadable_dataset, start_offset_samples=0, stop_offset_samples=None, window_size_samples=100, window_stride_samples=100, drop_last_window=False, picks=picks, on_missing=on_missing, flat=flat, reject=reject) epochs = windows.datasets[0].windows assert epochs.ch_names == picks assert epochs.reject == reject assert epochs.flat == flat for ds in windows.datasets: assert ds.window_kwargs == [ ('create_fixed_length_windows', { 'start_offset_samples': 0, 'stop_offset_samples': None, 'window_size_samples': 100, 'window_stride_samples': 100, 'drop_last_window': False, 'mapping': None, 'preload': False, 'drop_bad_windows': True, 'picks': picks, 'reject': reject, 'flat': flat, 'targets_from': 'metadata', 'last_target_only': True, 'on_missing': on_missing}), ('WindowsDataset', { 'targets_from': 'metadata', 'last_target_only': True, }) ]
def load_example_data(preload, window_len_s, n_subjects=10): """Create windowed dataset from subjects of the TUH Abnormal dataset. Parameters ---------- preload: bool If True, use eager loading, otherwise use lazy loading. n_subjects: int Number of subjects to load. Returns ------- windows_ds: BaseConcatDataset Windowed data. .. warning:: The recordings from the TUH Abnormal corpus do not all share the same sampling rate. The following assumes that the files have already been resampled to a common sampling rate. """ subject_ids = list(range(n_subjects)) ds = TUHAbnormal(TUH_PATH, subject_ids=subject_ids, target_name='pathological', preload=preload) fs = ds.datasets[0].raw.info['sfreq'] window_len_samples = int(fs * window_len_s) window_stride_samples = int(fs * 4) # window_stride_samples = int(fs * window_len_s) windows_ds = create_fixed_length_windows( ds, start_offset_samples=0, stop_offset_samples=None, window_size_samples=window_len_samples, window_stride_samples=window_stride_samples, drop_last_window=True, preload=preload, drop_bad_windows=True) # Drop bad epochs # XXX: This could be parallelized. # XXX: Also, this could be implemented in the Dataset object itself. for ds in windows_ds.datasets: ds.windows.drop_bad() assert ds.windows.preload == preload return windows_ds
def test_fixed_length_windower_n_jobs(lazy_loadable_dataset): longer_dataset = BaseConcatDataset([lazy_loadable_dataset.datasets[0]] * 8) windows = [create_fixed_length_windows( concat_ds=longer_dataset, start_offset_samples=0, stop_offset_samples=None, window_size_samples=100, window_stride_samples=100, drop_last_window=True, preload=True, n_jobs=n_jobs) for n_jobs in [1, 2]] assert windows[0].description.equals(windows[1].description) for ds1, ds2 in zip(windows[0].datasets, windows[1].datasets): # assert ds1.windows == ds2.windows # Runs locally, fails in CI assert np.allclose(ds1.windows.get_data(), ds2.windows.get_data()) assert pd.Series(ds1.windows.info).to_json() == \ pd.Series(ds2.windows.info).to_json() assert ds1.description.equals(ds2.description) assert np.array_equal(ds1.y, ds2.y) assert np.array_equal(ds1.crop_inds, ds2.crop_inds)
def test_fixed_length_windower(start_offset_samples, window_size_samples, window_stride_samples, drop_last_window, mapping): rng = np.random.RandomState(42) info = mne.create_info(ch_names=['0', '1'], sfreq=50, ch_types='eeg') data = rng.randn(2, 1000) raw = mne.io.RawArray(data=data, info=info) desc = pd.Series({'pathological': True, 'gender': 'M', 'age': 48}) base_ds = BaseDataset(raw, desc, target_name="age") concat_ds = BaseConcatDataset([base_ds]) if window_size_samples is None: window_size_samples = base_ds.raw.n_times stop_offset_samples = data.shape[1] - start_offset_samples epochs_ds = create_fixed_length_windows( concat_ds, start_offset_samples=start_offset_samples, stop_offset_samples=stop_offset_samples, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=drop_last_window, mapping=mapping) if mapping is not None: assert base_ds.target == 48 assert all(epochs_ds.datasets[0].windows.metadata['target'] == 0) epochs_data = epochs_ds.datasets[0].windows.get_data() idxs = np.arange(start_offset_samples, stop_offset_samples - window_size_samples + 1, window_stride_samples) if not drop_last_window and idxs[ -1] != stop_offset_samples - window_size_samples: idxs = np.append(idxs, stop_offset_samples - window_size_samples) assert len(idxs) == epochs_data.shape[0], ( 'Number of epochs different than expected') assert window_size_samples == epochs_data.shape[2], ( 'Window size different than expected') for j, idx in enumerate(idxs): np.testing.assert_allclose(base_ds.raw.get_data()[:, idx:idx + window_size_samples], epochs_data[j, :], err_msg=f'Epochs different for epoch {j}')
def prepare_data(n_recs, save, preload, n_jobs): if save: tmp_dir = tempfile.TemporaryDirectory() save_dir = tmp_dir.name else: save_dir = None # (1) Load the data concat_ds = SleepPhysionet(subject_ids=range(n_recs), recording_ids=[1], crop_wake_mins=30, preload=preload) sfreq = concat_ds.datasets[0].raw.info['sfreq'] # (2) Preprocess the continuous data preprocessors = [ Preprocessor('crop', tmin=10), Preprocessor('filter', l_freq=None, h_freq=30) ] preprocess(concat_ds, preprocessors, save_dir=save_dir, overwrite=True, n_jobs=n_jobs) # (3) Window the data windows_ds = create_fixed_length_windows(concat_ds, 0, None, int(30 * sfreq), int(30 * sfreq), True, preload=preload, n_jobs=n_jobs) # Preprocess the windowed data preprocessors = [Preprocessor(scale, channel_wise=True)] preprocess(windows_ds, preprocessors, save_dir=save_dir, overwrite=True, n_jobs=n_jobs)
def test_epochs_kwargs(lazy_loadable_dataset): picks = ['ch0'] on_missing = 'warning' flat = {'eeg': 3e-6} reject = {'eeg': 43e-6} windows = create_windows_from_events(concat_ds=lazy_loadable_dataset, trial_start_offset_samples=0, trial_stop_offset_samples=0, window_size_samples=100, window_stride_samples=100, drop_last_window=False, picks=picks, on_missing=on_missing, flat=flat, reject=reject) epochs = windows.datasets[0].windows assert epochs.ch_names == picks assert epochs.reject == reject assert epochs.flat == flat windows = create_fixed_length_windows(concat_ds=lazy_loadable_dataset, start_offset_samples=0, stop_offset_samples=None, window_size_samples=100, window_stride_samples=100, drop_last_window=False, picks=picks, on_missing=on_missing, flat=flat, reject=reject) epochs = windows.datasets[0].windows assert epochs.ch_names == picks assert epochs.reject == reject assert epochs.flat == flat
# create one directory for every recording rec_path = os.path.join(OUT_PATH, str(rec_i)) if not os.path.exists(rec_path): os.makedirs(rec_path) tuh_subset.save(rec_path) # save memory by deleting raw recording del tuh_subset.datasets[0].raw ############################################################################### # We reload the preprocessed data again in a lazy fashion (`preload=False`). tuh_loaded = load_concat_dataset(OUT_PATH, preload=False) ############################################################################### # We generate compute windows. The resulting dataset is now ready to be used # for model training. window_size_samples = 1000 window_stride_samples = 1000 # generate compute windows here and store them to disk tuh_windows = create_fixed_length_windows( tuh_loaded, start_offset_samples=0, stop_offset_samples=None, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=False) # store the number of windows required for loading later on tuh_windows.set_description( {"n_windows": [len(d) for d in tuh_windows.datasets]})
for i, (x, y, window_ind) in enumerate(windows_dataset): ax_arr[i].plot(x.T) ax_arr[i].set_ylim(-4e-5, 4e-5) ax_arr[i].set_title(f"label={y}") if i == max_i: break fig.tight_layout() ############################################################################### # Alternatively, we can create evenly spaced ("sliding") windows using a # different windower. sliding_windows_dataset = create_fixed_length_windows( dataset, start_offset_samples=0, stop_offset_samples=0, window_size_samples=1200, window_stride_samples=1000, drop_last_window=False) print(len(sliding_windows_dataset)) for x, y, window_ind in sliding_windows_dataset: print(x.shape, y, window_ind) break sliding_windows_dataset.description ############################################################################### # Transforms can also be applied on windows in the same way as shown # above on continuous data:
# the preprocessed data is automatically reloaded with ``preload=False``. # # .. note:: # Here we use ``n_jobs=2`` as the machines the documentation is build on # only have two cores. This number should be modified based on the machine # that is available for preprocessing. OUT_PATH = tempfile.mkdtemp() # please insert actual output directory here tuh_preproc = preprocess(concat_ds=tuh, preprocessors=preprocessors, n_jobs=N_JOBS, save_dir=OUT_PATH) ############################################################################### # We can finally generate compute windows. The resulting dataset is now ready # to be used for model training. window_size_samples = 1000 window_stride_samples = 1000 # generate compute windows here and store them to disk tuh_windows = create_fixed_length_windows( tuh_preproc, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=False, n_jobs=N_JOBS, ) for x, y, ind in tuh_windows: break
###################################################################### # Cut Compute Windows # ~~~~~~~~~~~~~~~~~~~ # from braindecode.preprocessing import create_fixed_length_windows # Create windows using braindecode function for this. It needs parameters to define how # trials should be used. train_set = create_fixed_length_windows( train_set, start_offset_samples=0, stop_offset_samples=None, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input, drop_last_window=False, targets_from='channels', last_target_only=False, preload=False) valid_set = create_fixed_length_windows( valid_set, start_offset_samples=0, stop_offset_samples=None, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input, drop_last_window=False, targets_from='channels', last_target_only=False, preload=False)
def test_variable_length_trials_cropped_decoding(): cuda = False set_random_seeds(seed=20210726, cuda=cuda) # create fake tuh abnormal dataset tuh = _TUHAbnormalMock(path='') # fake variable length trials by cropping first recording splits = tuh.split([[i] for i in range(len(tuh.datasets))]) preprocess( concat_ds=splits['0'], preprocessors=[ Preprocessor('crop', tmax=300), ], ) variable_tuh = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets))]) # make sure we actually have different length trials assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0) # create windows variable_tuh_windows = create_fixed_length_windows( concat_ds=variable_tuh, window_size_samples=1000, window_stride_samples=1000, drop_last_window=False, mapping={ True: 1, False: 0 }, ) # create train and valid set splits = variable_tuh_windows.split( [[i] for i in range(len(variable_tuh_windows.datasets))]) variable_tuh_windows_train = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets) - 1)]) variable_tuh_windows_valid = BaseConcatDataset( [splits[str(len(tuh.datasets) - 1)]]) for x, y, ind in variable_tuh_windows_train: break train_split = predefined_split(variable_tuh_windows_valid) # initialize a model model = ShallowFBCSPNet( in_chans=x.shape[0], n_classes=len(tuh.description.pathological.unique()), ) to_dense_prediction_model(model) if cuda: model.cuda() # create and train a classifier clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.nll_loss, optimizer=torch.optim.Adam, batch_size=32, callbacks=['accuracy'], train_split=train_split, ) clf.fit(variable_tuh_windows_train, y=None, epochs=3) # make sure it does what we expect np.testing.assert_allclose( clf.history[:, 'train_loss'], np.array([ 0.689495325088501, 0.1353449523448944, 0.006638816092163324, ]), rtol=1e-1, atol=1e-1, ) np.testing.assert_allclose( clf.history[:, 'valid_loss'], np.array([ 2.925871, 3.611423, 4.23494, ]), rtol=1e-1, atol=1e-1, )
x, y = tuh[-1] print('x:', x) print('y:', y) ############################################################################### # We will skip preprocessing steps for now, since it is not the aim of this # example. Instead, we will directly create compute windows. We specify a # mapping from genders 'M' and 'F' to integers, since this is required for # decoding. tuh_windows = create_fixed_length_windows( tuh, start_offset_samples=0, stop_offset_samples=None, window_size_samples=1000, window_stride_samples=1000, drop_last_window=False, mapping={ 'M': 0, 'F': 1 }, # map non-digit targets ) # store the number of windows required for loading later on tuh_windows.set_description( {"n_windows": [len(d) for d in tuh_windows.datasets]}) ############################################################################### # Iterating through the dataset gives x as ndarray(n_channels x 1000), y as # [age, gender], and ind. Let's look at the last example again. x, y, ind = tuh_windows[-1] print('x:', x) print('y:', y)
base_ds = BaseDataset(raw, fake_descrition, target_name="target") datasets.append(base_ds) dataset = BaseConcatDataset(datasets) return dataset dataset = fake_regression_dataset(n_fake_recs=5, n_fake_chs=21, fake_sfreq=100, fake_duration_s=60) windows_dataset = create_fixed_length_windows( dataset, start_offset_samples=0, stop_offset_samples=0, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input, drop_last_window=False, drop_bad_windows=True, ) splits = windows_dataset.split("session") train_set = splits["train"] valid_set = splits["eval"] regressor = EEGRegressor( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.mse_loss, optimizer=torch.optim.AdamW,