def prepare_data(n_recs, save, preload, n_jobs): if save: tmp_dir = tempfile.TemporaryDirectory() save_dir = tmp_dir.name else: save_dir = None # (1) Load the data concat_ds = SleepPhysionet(subject_ids=range(n_recs), recording_ids=[1], crop_wake_mins=30, preload=preload) sfreq = concat_ds.datasets[0].raw.info['sfreq'] # (2) Preprocess the continuous data preprocessors = [ Preprocessor('crop', tmin=10), Preprocessor('filter', l_freq=None, h_freq=30) ] preprocess(concat_ds, preprocessors, save_dir=save_dir, overwrite=True, n_jobs=n_jobs) # (3) Window the data windows_ds = create_fixed_length_windows(concat_ds, 0, None, int(30 * sfreq), int(30 * sfreq), True, preload=preload, n_jobs=n_jobs) # Preprocess the windowed data preprocessors = [Preprocessor(scale, channel_wise=True)] preprocess(windows_ds, preprocessors, save_dir=save_dir, overwrite=True, n_jobs=n_jobs)
def test_load_save_raw_preproc_kwargs(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset preprocess(concat_raw_dataset, [ Preprocessor('pick_channels', ch_names=['C3']), ]) concat_raw_dataset.save(tmpdir, overwrite=False) for i in range(len(concat_raw_dataset.datasets)): assert os.path.exists( os.path.join(tmpdir, str(i), 'raw_preproc_kwargs.json')) loaded_concat_raw_dataset = load_concat_dataset(tmpdir, preload=False) for ds in loaded_concat_raw_dataset.datasets: assert ds.raw_preproc_kwargs == [ ('pick_channels', { 'ch_names': ['C3'] }), ]
def test_load_save_window_preproc_kwargs(setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset concat_windows_dataset.save(tmpdir, overwrite=False) for i in range(len(concat_windows_dataset.datasets)): subdir = os.path.join(tmpdir, str(i)) assert os.path.exists(os.path.join(subdir, 'window_kwargs.json')) preprocess(concat_windows_dataset, [ Preprocessor('pick_channels', ch_names=['Cz']), ]) concat_windows_dataset.save(tmpdir, overwrite=True) for i in range(len(concat_windows_dataset.datasets)): subdir = os.path.join(tmpdir, str(i)) assert os.path.exists(os.path.join(subdir, 'window_kwargs.json')) assert os.path.exists( os.path.join(subdir, 'window_preproc_kwargs.json')) loaded_concat_windows_dataset = load_concat_dataset(tmpdir, preload=False) for ds in loaded_concat_windows_dataset.datasets: assert ds.window_kwargs == [('create_windows_from_events', { 'infer_mapping': True, 'infer_window_size_stride': True, 'trial_start_offset_samples': 0, 'trial_stop_offset_samples': 0, 'window_size_samples': None, 'window_stride_samples': None, 'drop_last_window': False, 'mapping': { 'feet': 0, 'left_hand': 1, 'right_hand': 2, 'tongue': 3 }, 'preload': False, 'drop_bad_windows': True, 'picks': None, 'reject': None, 'flat': None, 'on_missing': 'error', 'accepted_bads_ratio': 0.0 })] assert ds.window_preproc_kwargs == [ ('pick_channels', { 'ch_names': ['Cz'] }), ]
crop_wake_mins=30, crop=crop) ###################################################################### # Preprocessing # ~~~~~~~~~~~~~ # # Next, we preprocess the raw data. We convert the data to microvolts and apply # a lowpass filter. from braindecode.preprocessing import preprocess, Preprocessor, scale high_cut_hz = 30 preprocessors = [ Preprocessor(scale, factor=1e6, apply_on_array=True), Preprocessor('filter', l_freq=None, h_freq=high_cut_hz) ] # Transform the data preprocess(dataset, preprocessors) ###################################################################### # Extract windows # ~~~~~~~~~~~~~~~ # # We extract 30-s windows to be used in the classification task. # The Eldele2021 model takes a single channel as input. Here, the Fpz-Cz channel is used as it # was found to give better performance than using the Pz-Oz channel from braindecode.preprocessing import create_windows_from_events
def custom_crop(raw, tmin=0.0, tmax=None, include_tmax=True): # crop recordings to tmin – tmax. can be incomplete if recording # has lower duration than tmax # by default mne fails if tmax is bigger than duration tmax = min((raw.n_times - 1) / raw.info['sfreq'], tmax) raw.crop(tmin=tmin, tmax=tmax, include_tmax=include_tmax) tmin = 1 * 60 tmax = 6 * 60 sfreq = 100 preprocessors = [ Preprocessor(custom_crop, tmin=tmin, tmax=tmax, include_tmax=False, apply_on_array=False), Preprocessor('set_eeg_reference', ref_channels='average', ch_type='eeg'), Preprocessor(custom_rename_channels, mapping=ch_mapping, apply_on_array=False), Preprocessor('pick_channels', ch_names=short_ch_names, ordered=True), Preprocessor(lambda x: x * 1e6), Preprocessor(fn=np.clip, a_min=-800, a_max=800, apply_on_array=True), Preprocessor('resample', sfreq=sfreq), ] ############################################################################### # The preprocessing loop works as follows. For every recording, we apply the # preprocessors as defined above. Then, we update the description of the rec,
###################################################################### # Preprocessing # ~~~~~~~~~~~~~ # from braindecode.preprocessing import (exponential_moving_standardize, preprocess, Preprocessor) low_cut_hz = 4. # low cut frequency for filtering high_cut_hz = 38. # high cut frequency for filtering # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 preprocessors = [ Preprocessor('pick_types', eeg=True, meg=False, stim=False), # Keep EEG sensors Preprocessor(lambda x: x * 1e6), # Convert from V to uV Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # Bandpass filter Preprocessor( exponential_moving_standardize, # Exponential moving standardization factor_new=factor_new, init_block_size=init_block_size) ] preprocess(dataset, preprocessors) ###################################################################### # Extracting windows # ~~~~~~~~~~~~~~~~~~ #
def test_variable_length_trials_cropped_decoding(): cuda = False set_random_seeds(seed=20210726, cuda=cuda) # create fake tuh abnormal dataset tuh = _TUHAbnormalMock(path='') # fake variable length trials by cropping first recording splits = tuh.split([[i] for i in range(len(tuh.datasets))]) preprocess( concat_ds=splits['0'], preprocessors=[ Preprocessor('crop', tmax=300), ], ) variable_tuh = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets))]) # make sure we actually have different length trials assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0) # create windows variable_tuh_windows = create_fixed_length_windows( concat_ds=variable_tuh, window_size_samples=1000, window_stride_samples=1000, drop_last_window=False, mapping={ True: 1, False: 0 }, ) # create train and valid set splits = variable_tuh_windows.split( [[i] for i in range(len(variable_tuh_windows.datasets))]) variable_tuh_windows_train = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets) - 1)]) variable_tuh_windows_valid = BaseConcatDataset( [splits[str(len(tuh.datasets) - 1)]]) for x, y, ind in variable_tuh_windows_train: break train_split = predefined_split(variable_tuh_windows_valid) # initialize a model model = ShallowFBCSPNet( in_chans=x.shape[0], n_classes=len(tuh.description.pathological.unique()), ) to_dense_prediction_model(model) if cuda: model.cuda() # create and train a classifier clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.nll_loss, optimizer=torch.optim.Adam, batch_size=32, callbacks=['accuracy'], train_split=train_split, ) clf.fit(variable_tuh_windows_train, y=None, epochs=3) # make sure it does what we expect np.testing.assert_allclose( clf.history[:, 'train_loss'], np.array([ 0.689495325088501, 0.1353449523448944, 0.006638816092163324, ]), rtol=1e-1, atol=1e-1, ) np.testing.assert_allclose( clf.history[:, 'valid_loss'], np.array([ 2.925871, 3.611423, 4.23494, ]), rtol=1e-1, atol=1e-1, )
# `torchvision <https://pytorch.org/docs/stable/torchvision/index.html>`__. # from braindecode.preprocessing import (exponential_moving_standardize, preprocess, Preprocessor) low_cut_hz = 1. # low cut frequency for filtering high_cut_hz = 200. # high cut frequency for filtering, for ECoG higher than for EEG # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 ###################################################################### # We select only first 30 seconds from each dataset to limit time and memory # to run this example. To obtain results on the whole datasets you should remove this line. preprocess(dataset, [Preprocessor('crop', tmin=0, tmax=30)]) ###################################################################### # In time series targets setup, targets variables are stored in mne.Raw object as channels # of type `misc`. Thus those channels have to be selected for further processing. However, # many mne functions ignore `misc` channels and perform operations only on data channels # (see https://mne.tools/stable/glossary.html#term-data-channels). preprocessors = [ Preprocessor('pick_types', ecog=True, misc=True), Preprocessor(lambda x: x / 1e6, picks='ecog'), # Convert from V to uV Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # Bandpass filter Preprocessor( exponential_moving_standardize, # Exponential moving standardization factor_new=factor_new, init_block_size=init_block_size,
from braindecode.preprocessing import create_windows_from_events ############################################################################### # First, we load some dataset using MOABB. dataset = MOABBDataset( dataset_name='BNCI2014001', subject_ids=[1], ) ############################################################################### # We can apply preprocessing steps to the dataset. It is also possible to skip # this step and not apply any preprocessing. preprocess( concat_ds=dataset, preprocessors=[Preprocessor(fn='resample', sfreq=10)] ) ############################################################################### # We save the dataset to a an existing directory. It will create a '.fif' file # for every dataset in the concat dataset. Additionally it will create two # JSON files, the first holding the description of the dataset, the second # holding the name of the target. If you want to store to the same directory # several times, for example due to trying different preprocessing, you can # choose to overwrite the existing files. tmpdir = tempfile.mkdtemp() # write in a temporary directory dataset.save( path=tmpdir, overwrite=False, )
# ds has a pandas DataFrame with additional description of its internal datasets dataset.description ############################################################################## # We can iterate through ds which yields one time point of a continuous signal x, # and a target y (which can be None if targets are not defined for the entire # continuous signal). for x, y in dataset: print(x.shape, y) break ############################################################################## # We can apply preprocessing transforms that are defined in mne and work # in-place, such as resampling, bandpass filtering, or electrode selection. preprocessors = [ Preprocessor('pick_types', eeg=True, meg=False, stim=True), Preprocessor('resample', sfreq=100) ] print(dataset.datasets[0].raw.info["sfreq"]) preprocess(dataset, preprocessors) print(dataset.datasets[0].raw.info["sfreq"]) ############################################################################### # We can easily split ds based on a criteria applied to the description # DataFrame: subsets = dataset.split("session") print({subset_name: len(subset) for subset_name, subset in subsets.items()}) ############################################################################### # Next, we use a windower to extract events from the dataset based on events: windows_dataset = create_windows_from_events(dataset,