def bandpass_window_BaseConcat(dataset, bandpass_range=(4, 38), window_start_offset=1.0, window_end_offset=-0.5): ''' For bandpass filtering and windowing to return in BaseConcatDataset form. :param dataset: :param bandpass_range: :param window_start_offset: :param window_end_offset: :return: ''' low_cut_hz = bandpass_range[0] high_cut_hz = bandpass_range[1] factor_new = 1e-3 init_block_size = 1000 preprocessors = [ # keep only EEG sensors MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False), # convert from volt to microvolt, directly modifying the numpy array NumpyPreproc(fn=lambda x: x * 1e6), # bandpass filter MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # exponential moving standardization NumpyPreproc(fn=exponential_moving_standardize, factor_new=factor_new, init_block_size=init_block_size) ] # Transform the data ds_copy = copy.deepcopy(dataset) preprocess(ds_copy, preprocessors) trial_start_offset_seconds = window_start_offset trial_stop_offset_seconds = window_end_offset # Extract sampling frequency, check that they are same in all datasets sfreq = ds_copy.datasets[0].raw.info['sfreq'] assert all( [ds_subj.raw.info['sfreq'] == sfreq for ds_subj in ds_copy.datasets]) # Calculate the trial start offset in samples. trial_start_offset_samples = int(trial_start_offset_seconds * sfreq) trial_stop_offset_samples = int(trial_stop_offset_seconds * sfreq) # Create windows using braindecode function for this. It needs parameters to define how # trials should be used. windows_dataset = create_windows_from_events( ds_copy, trial_start_offset_samples=trial_start_offset_samples, trial_stop_offset_samples=trial_stop_offset_samples, preload=True, ) return windows_dataset
def load_train_test_hgd(subject_id): hgd_names = [ 'Fp2', 'Fp1', 'F4', 'F3', 'C4', 'C3', 'P4', 'P3', 'O2', 'O1', 'F8', 'F7', 'T8', 'T7', 'P8', 'P7', 'M2', 'M1', 'Fz', 'Cz', 'Pz' ] log.info("Loading dataset..") # using the moabb dataset to load our data dataset = MOABBDataset(dataset_name="Schirrmeister2017", subject_ids=[subject_id]) sfreq = 32 train_whole_set = dataset.split('run')['train'] log.info("Preprocessing dataset..") # Define preprocessing steps preprocessors = [ # convert from volt to microvolt, directly modifying the numpy array MNEPreproc( fn='set_eeg_reference', ref_channels='average', ), MNEPreproc(fn='pick_channels', ch_names=hgd_names, ordered=True), NumpyPreproc(fn=lambda x: x * 1e6), NumpyPreproc(fn=lambda x: np.clip(x, -800, 800)), NumpyPreproc(fn=lambda x: x / 10), MNEPreproc(fn='resample', sfreq=sfreq), NumpyPreproc(fn=lambda x: np.clip(x, -80, 80)), NumpyPreproc(fn=lambda x: x / 3), NumpyPreproc(fn=exponential_moving_demean, init_block_size=int(sfreq * 10), factor_new=1 / (sfreq * 5)), # keep only EEG sensors # NumpyPreproc(fn=exponential_moving_demean, init_block_size=sfreq*10, factor_new=1/(sfreq*5)), ] # Preprocess the data preprocess(train_whole_set, preprocessors) # Next, extract the 4-second trials from the dataset. # Create windows using braindecode function for this. It needs parameters to define how # trials should be used. class_names = ['Right Hand', 'Rest'] # for later plotting class_mapping = {'right_hand': 0, 'rest': 1} windows_dataset = create_windows_from_events( train_whole_set, trial_start_offset_samples=0, trial_stop_offset_samples=0, preload=True, mapping=class_mapping, ) from torch.utils.data import Subset n_split = int(np.round(0.75 * len(windows_dataset))) valid_set = Subset(windows_dataset, range(n_split, len(windows_dataset))) train_set = Subset(windows_dataset, range(0, n_split)) return train_set, valid_set
def preprocess_data(dataset, low_cut_hz=4, high_cut_hz=38, factor_new=1e-3, init_block_size=1000): preprocessors = [ # keep only EEG sensors MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False), # convert from volt to microvolt, directly modifying the numpy array NumpyPreproc(fn=lambda x: x * 1e6), # bandpass filter MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # exponential moving standardization NumpyPreproc(fn=exponential_moving_standardize, factor_new=factor_new, init_block_size=init_block_size) ] # Transform the data preprocess(dataset, preprocessors) return dataset
def our_preprocess(dataset): low_cut_hz = 4. # low cut frequency for filtering high_cut_hz = 38. # high cut frequency for filtering # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 preprocessors = [ # keep only EEG sensors MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False), # convert from volt to microvolt, directly modifying the numpy array NumpyPreproc(fn=lambda x: x * 1e6), # bandpass filter MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # exponential moving standardization NumpyPreproc(fn=exponential_moving_standardize, factor_new=factor_new, init_block_size=init_block_size) ] # Transform the data preprocess(dataset, preprocessors)
def load_train_valid_tuh(n_subjects, n_seconds, ids_to_load): path = '/home/schirrmr/data/preproced-tuh/all-sensors-32-hz/' log.info("Load concat dataset...") dataset = load_concat_dataset(path, preload=False, ids_to_load=ids_to_load) whole_train_set = dataset.split('session')['train'] n_max_minutes = int(np.ceil(n_seconds / 60) + 2) sfreq = whole_train_set.datasets[0].raw.info['sfreq'] log.info("Preprocess concat dataset...") preprocess(whole_train_set, [ MNEPreproc('crop', tmin=0, tmax=n_max_minutes * 60, include_tmax=True), NumpyPreproc(fn=lambda x: np.clip(x, -80, 80)), NumpyPreproc(fn=lambda x: x / 3), NumpyPreproc(fn=exponential_moving_demean, init_block_size=int(sfreq * 10), factor_new=1 / (sfreq * 5)), ]) subject_datasets = whole_train_set.split('subject') n_split = int(np.round(n_subjects * 0.75)) keys = list(subject_datasets.keys()) train_sets = [ d for i in range(n_split) for d in subject_datasets[keys[i]].datasets ] train_set = BaseConcatDataset(train_sets) valid_sets = [ d for i in range(n_split, n_subjects) for d in subject_datasets[keys[i]].datasets ] valid_set = BaseConcatDataset(valid_sets) train_set = create_fixed_length_windows( train_set, start_offset_samples=60 * 32, stop_offset_samples=60 * 32 + 32 * n_seconds, preload=True, window_size_samples=128, window_stride_samples=64, drop_last_window=True, ) valid_set = create_fixed_length_windows( valid_set, start_offset_samples=60 * 32, stop_offset_samples=60 * 32 + 32 * n_seconds, preload=True, window_size_samples=128, window_stride_samples=64, drop_last_window=True, ) return train_set, valid_set
def build_epoch(subjects, recording, crop_wake_mins, preprocessing, train=True): dataset = SleepPhysionet(subject_ids=subjects, recording_ids=recording, crop_wake_mins=crop_wake_mins) if preprocessing: preprocessors = [] if "microvolt_scaling" in preprocessing: preprocessors.append(NumpyPreproc(fn=lambda x: x * 1e6)) if "filtering" in preprocessing: high_cut_hz = 30 preprocessors.append( MNEPreproc(fn='filter', l_freq=None, h_freq=high_cut_hz)) # Transform the data preprocess(dataset, preprocessors) mapping = { # We merge stages 3 and 4 following AASM standards. 'Sleep stage W': 0, 'Sleep stage 1': 1, 'Sleep stage 2': 2, 'Sleep stage 3': 3, 'Sleep stage 4': 3, 'Sleep stage R': 4 } window_size_s = 30 sfreq = 100 window_size_samples = window_size_s * sfreq windows_dataset = create_windows_from_events( dataset, trial_start_offset_samples=0, trial_stop_offset_samples=0, window_size_samples=window_size_samples, window_stride_samples=window_size_samples, preload=True, mapping=mapping) return windows_dataset
def custom_crop(raw, tmin=0.0, tmax=None, include_tmax=True): # crop recordings to tmin – tmax. can be incomplete if recording # has lower duration than tmax # by default mne fails if tmax is bigger than duration tmax = min((raw.n_times - 1) / raw.info['sfreq'], tmax) raw.crop(tmin=tmin, tmax=tmax, include_tmax=include_tmax) tmin = 1 * 60 tmax = 6 * 60 sfreq = 100 preprocessors = [ MNEPreproc(custom_crop, tmin=tmin, tmax=tmax, include_tmax=False), MNEPreproc('set_eeg_reference', ref_channels='average', ch_type='eeg'), MNEPreproc(custom_rename_channels, mapping=ch_mapping), MNEPreproc("pick_channels", ch_names=short_ch_names, ordered=True), NumpyPreproc(lambda x: x * 1e6), MNEPreproc("resample", sfreq=sfreq), ] ############################################################################### # The preprocessing loop works as follows. For every recording, we apply the # preprocessors as defined above. Then, we update the description of the rec, # since we have altered the duration, the reference, and the sampling frequency. # Afterwards, we split the continuous signals into compute windows. We store # each recording to a unique subdirectory that is named corresponding to the # rec id. To save memory, after windowing and storing, we delete the raw # dataset and the windows dataset, respectively.
def bandpass_data(datasets, filter_range, window_start_offset=1.0, window_end_offset=0): ''' A function used to bandpass filter the given EEG dataset :param datasets: (list[MOABBDataset]) a list of MOABBDatasets by subject :param filter_range: tuple (low_cut, high_cut) :return: (list[BaseConcatDataset]) a list of bandpass filtered data by subject ''' low_cut_hz = filter_range[0] # low cut frequency for filtering high_cut_hz = filter_range[1] # high cut frequency for filtering # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 preprocessors = [ # keep only EEG sensors MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False), # convert from volt to microvolt, directly modifying the numpy array NumpyPreproc(fn=lambda x: x * 1e6), # bandpass filter MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # exponential moving standardization NumpyPreproc(fn=exponential_moving_standardize, factor_new=factor_new, init_block_size=init_block_size) ] filtered_ds = [] # apply bandpass filter for each subject for ds in datasets: ds_copy = copy.deepcopy(ds) preprocess(ds_copy, preprocessors) trial_start_offset_seconds = window_start_offset trial_stop_offset_seconds = window_end_offset # Extract sampling frequency, check that they are same in all datasets sfreq = ds_copy.datasets[0].raw.info['sfreq'] assert all([ ds_subj.raw.info['sfreq'] == sfreq for ds_subj in ds_copy.datasets ]) # Calculate the trial start offset in samples. trial_start_offset_samples = int(trial_start_offset_seconds * sfreq) trial_stop_offset_samples = int(trial_stop_offset_seconds * sfreq) # Create windows using braindecode function for this. It needs parameters to define how # trials should be used. windows_dataset = create_windows_from_events( ds_copy, trial_start_offset_samples=trial_start_offset_samples, trial_stop_offset_samples=trial_stop_offset_samples, preload=True, ) filtered_ds.append(windows_dataset) return filtered_ds
def exp(subject_id): dataset = MOABBDataset(dataset_name="BNCI2014001", subject_ids=subject_id) from braindecode.datautil.preprocess import exponential_moving_standardize from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess low_cut_hz = 0. # low cut frequency for filtering high_cut_hz = 38. # high cut frequency for filtering # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 preprocessors = [ # keep only EEG sensors MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False), # convert from volt to microvolt, directly modifying the numpy array NumpyPreproc(fn=lambda x: x * 1e6), # bandpass filter MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # exponential moving standardization # NumpyPreproc(fn=exponential_moving_standardize, factor_new=factor_new, # init_block_size=init_block_size) ] # Transform the data preprocess(dataset, preprocessors) ###################################################################### # Create model and compute windowing parameters # --------------------------------------------- # ###################################################################### # In contrast to trialwise decoding, we first have to create the model # before we can cut the dataset into windows. This is because we need to # know the receptive field of the network to know how large the window # stride should be. # ###################################################################### # We first choose the compute/input window size that will be fed to the # network during training This has to be larger than the networks # receptive field size and can otherwise be chosen for computational # efficiency (see explanations in the beginning of this tutorial). Here we # choose 1000 samples, which are 4 seconds for the 250 Hz sampling rate. # input_window_samples = 1000 ###################################################################### # Now we create the model. To enable it to be used in cropped decoding # efficiently, we manually set the length of the final convolution layer # to some length that makes the receptive field of the ConvNet smaller # than ``input_window_samples`` (see ``final_conv_length=30`` in the model # definition). # import torch from braindecode.util import set_random_seeds from braindecode.models import ShallowFBCSPNet, Deep4Net cuda = torch.cuda.is_available( ) # check if GPU is available, if True chooses to use it device = 'cuda:1' if cuda else 'cpu' if cuda: torch.backends.cudnn.benchmark = True seed = 20190706 # random seed to make results reproducible # Set random seed to be able to reproduce results set_random_seeds(seed=seed, cuda=cuda) n_classes = 4 # Extract number of chans from dataset n_chans = dataset[0][0].shape[0] # model = Deep4Net( # n_chans, # n_classes, # input_window_samples=input_window_samples, # final_conv_length="auto", # ) # # # # embedding_net = Deep4Net_origin(4, 22, input_window_samples) # model = FcClfNet(embedding_net) model = ShallowFBCSPNet( n_chans, n_classes, input_window_samples=input_window_samples, final_conv_length=30, ) print(model) # Send model to GPU if cuda: model.cuda(device) ###################################################################### # And now we transform model with strides to a model that outputs dense # prediction, so we can use it to obtain predictions for all # crops. # from braindecode.models.util import to_dense_prediction_model, get_output_shape to_dense_prediction_model(model) n_preds_per_input = get_output_shape(model, n_chans, input_window_samples)[2] print("n_preds_per_input : ", n_preds_per_input) print(model) ###################################################################### # Cut the data into windows # ------------------------- # ###################################################################### # In contrast to trialwise decoding, we have to supply an explicit window size and window stride to the # ``create_windows_from_events`` function. # import numpy as np from braindecode.datautil.windowers import create_windows_from_events trial_start_offset_seconds = -0.5 # Extract sampling frequency, check that they are same in all datasets sfreq = dataset.datasets[0].raw.info['sfreq'] assert all([ds.raw.info['sfreq'] == sfreq for ds in dataset.datasets]) # Calculate the trial start offset in samples. trial_start_offset_samples = int(trial_start_offset_seconds * sfreq) # Create windows using braindecode function for this. It needs parameters to define how # trials should be used. windows_dataset = create_windows_from_events( dataset, trial_start_offset_samples=trial_start_offset_samples, trial_stop_offset_samples=0, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input, drop_last_window=False, preload=True, ) ###################################################################### # Split the dataset # ----------------- # # This code is the same as in trialwise decoding. # from braindecode.datasets.base import BaseConcatDataset splitted = windows_dataset.split('session') train_set = splitted['session_T'] valid_set = splitted['session_E'] ###################################################################### # In difference to trialwise decoding, we now should supply # ``cropped=True`` to the EEGClassifier, and ``CroppedLoss`` as the # criterion, as well as ``criterion__loss_function`` as the loss function # applied to the meaned predictions. # ###################################################################### # .. note:: # In this tutorial, we use some default parameters that we # have found to work well for motor decoding, however we strongly # encourage you to perform your own hyperparameter optimization using # cross validation on your training data. # from skorch.callbacks import LRScheduler from skorch.helper import predefined_split from braindecode import EEGClassifier from braindecode.training.losses import CroppedLoss from braindecode.training.scoring import trial_preds_from_window_preds # # These values we found good for shallow network: lr = 0.0625 * 0.01 weight_decay = 0 # # For deep4 they should be: # lr = 1 * 0.01 # weight_decay = 0.5 * 0.001 # batch_size = 8 n_epochs = 100 clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.nll_loss, optimizer=torch.optim.AdamW, train_split=predefined_split(valid_set), optimizer__lr=lr, optimizer__weight_decay=weight_decay, iterator_train__shuffle=True, batch_size=batch_size, callbacks=[ "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)), ], device=device, ) # Model training for a specified number of epochs. `y` is None as it is already supplied # in the dataset. clf.fit(train_set, y=None, epochs=n_epochs) ###################################################################### # Plot Results # ------------ # ###################################################################### # This is again the same code as in trialwise decoding. # # .. note:: # Note that we drop further in the classification error and # loss as in the trialwise decoding tutorial. # import matplotlib.pyplot as plt from matplotlib.lines import Line2D import pandas as pd # Extract loss and accuracy values for plotting from history object results_columns = [ 'train_loss', 'valid_loss', 'train_accuracy', 'valid_accuracy' ] df = pd.DataFrame(clf.history[:, results_columns], columns=results_columns, index=clf.history[:, 'epoch']) # get percent of misclass for better visual comparison to loss df = df.assign(train_misclass=100 - 100 * df.train_accuracy, valid_misclass=100 - 100 * df.valid_accuracy) plt.style.use('seaborn') fig, ax1 = plt.subplots(figsize=(8, 3)) df.loc[:, ['train_loss', 'valid_loss']].plot(ax=ax1, style=['-', ':'], marker='o', color='tab:blue', legend=False, fontsize=14) ax1.tick_params(axis='y', labelcolor='tab:blue', labelsize=14) ax1.set_ylabel("Loss", color='tab:blue', fontsize=14) ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis df.loc[:, ['train_misclass', 'valid_misclass']].plot(ax=ax2, style=['-', ':'], marker='o', color='tab:red', legend=False) ax2.tick_params(axis='y', labelcolor='tab:red', labelsize=14) ax2.set_ylabel("Misclassification Rate [%]", color='tab:red', fontsize=14) ax2.set_ylim(ax2.get_ylim()[0], 85) # make some room for legend ax1.set_xlabel("Epoch", fontsize=14) # where some data has already been plotted to ax handles = [] handles.append( Line2D([0], [0], color='black', linewidth=1, linestyle='-', label='Train')) handles.append( Line2D([0], [0], color='black', linewidth=1, linestyle=':', label='Valid')) plt.legend(handles, [h.get_label() for h in handles], fontsize=14) plt.tight_layout() return df