def test_preprocessors_with_misc_channels(): rng = np.random.RandomState(42) signal_sfreq = 50 info = mne.create_info(ch_names=['0', '1', 'target_0', 'target_1'], sfreq=signal_sfreq, ch_types=['eeg', 'eeg', 'misc', 'misc']) signal = rng.randn(2, 1000) targets = rng.randn(2, 1000) raw = mne.io.RawArray(np.concatenate([signal, targets]), info=info) desc = pd.Series({'pathological': True, 'gender': 'M', 'age': 48}) base_dataset = BaseDataset(raw, desc, target_name=None) concat_ds = BaseConcatDataset([base_dataset]) preprocessors = [ Preprocessor('pick_types', eeg=True, misc=True), Preprocessor(lambda x: x / 1e6), ] preprocess(concat_ds, preprocessors) # Check whether preprocessing has not affected the targets # This is only valid for preprocessors that use mne functions which do not modify # `misc` channels. np.testing.assert_array_equal( concat_ds.datasets[0].raw.get_data()[-2:, :], targets )
def concat_ds_targets(): raws, description = fetch_data_with_moabb( dataset_name="BNCI2014001", subject_ids=4) events, _ = mne.events_from_annotations(raws[0]) targets = events[:, -1] - 1 ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)] concat_ds = BaseConcatDataset(ds) return concat_ds, targets
def test_concat_concat_dataset(concat_ds_targets): concat_ds, targets = concat_ds_targets concat_ds1 = BaseConcatDataset(concat_ds.datasets[:2]) concat_ds2 = BaseConcatDataset(concat_ds.datasets[2:]) list_of_concat_ds = [concat_ds1, concat_ds2] descriptions = pd.concat([ds.description for ds in list_of_concat_ds]) descriptions.reset_index(inplace=True, drop=True) lens = [0] + [len(ds) for ds in list_of_concat_ds] cumsums = [ds.cumulative_sizes for ds in list_of_concat_ds] cumsums = [ls for i, cumsum in enumerate(cumsums) for ls in np.array(cumsum) + lens[i]] concat_concat_ds = BaseConcatDataset(list_of_concat_ds) assert len(concat_concat_ds) == sum(lens) assert len(concat_concat_ds) == concat_concat_ds.cumulative_sizes[-1] assert len(concat_concat_ds.datasets) == len(descriptions) assert len(concat_concat_ds.description) == len(descriptions) np.testing.assert_array_equal(cumsums, concat_concat_ds.cumulative_sizes) pd.testing.assert_frame_equal(descriptions, concat_concat_ds.description)
def test_save_varying_number_of_datasets_with_overwrite( setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset concat_windows_dataset.save(path=tmpdir, overwrite=False) subset = concat_windows_dataset.split([0])['0'] with pytest.warns(UserWarning, match='The number of saved datasets'): subset.save(path=tmpdir, overwrite=True) # assert no warning raised when there are as many subdirectories than before with pytest.warns(None) as raised_warnings: concat_windows_dataset.save(path=tmpdir, overwrite=True) assert len(raised_warnings) == 0 # assert no warning raised when there are more subdirectories than before double_concat_windows_dataset = BaseConcatDataset( [concat_windows_dataset, concat_windows_dataset]) with pytest.warns(None) as raised_warnings: double_concat_windows_dataset.save(path=tmpdir, overwrite=True) assert len(raised_warnings) == 0
def test_preprocess_overwrite(base_concat_ds, tmp_path, overwrite): preprocessors = [Preprocessor('crop', tmax=10, include_tmax=False)] # Create temporary directory with preexisting files save_dir = str(tmp_path) for i, ds in enumerate(base_concat_ds.datasets): concat_ds = BaseConcatDataset([ds]) save_subdir = os.path.join(save_dir, str(i)) os.makedirs(save_subdir) concat_ds.save(save_subdir, overwrite=True) if overwrite: preprocess(base_concat_ds, preprocessors, save_dir, overwrite=True) # Make sure the serialized data is preprocessed preproc_concat_ds = load_concat_dataset(save_dir, True) assert all([len(ds.raw.times) == 2500 for ds in preproc_concat_ds.datasets]) else: with pytest.raises(FileExistsError): preprocess(base_concat_ds, preprocessors, save_dir, overwrite=False)
def windows_ds(): raws, description = fetch_data_with_moabb( dataset_name='BNCI2014001', subject_ids=4) ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)] concat_ds = BaseConcatDataset(ds) windows_ds = create_fixed_length_windows( concat_ds=concat_ds, start_offset_samples=0, stop_offset_samples=None, window_size_samples=500, window_stride_samples=500, drop_last_window=False, preload=False) return windows_ds
def target_windows_ds(): raws, description = fetch_data_with_moabb(dataset_name='BNCI2014001', subject_ids=4) ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)] concat_ds = BaseConcatDataset(ds) windows_ds = create_windows_from_events(concat_ds, trial_start_offset_samples=0, trial_stop_offset_samples=0, window_size_samples=None, window_stride_samples=None, drop_last_window=False) return windows_ds
def fake_regression_dataset(n_fake_recs, n_fake_chs, fake_sfreq, fake_duration_s): datasets = [] for i in range(n_fake_recs): train_or_eval = "eval" if i == 0 else "train" raw, save_fname = create_mne_dummy_raw( n_channels=n_fake_chs, n_times=fake_duration_s*fake_sfreq, sfreq=fake_sfreq, savedir=None) target = np.random.randint(0, 100, n_classes) if n_classes == 1: target = target[0] fake_descrition = pd.Series( data=[target, train_or_eval], index=["target", "session"]) base_ds = BaseDataset(raw, fake_descrition, target_name="target") datasets.append(base_ds) dataset = BaseConcatDataset(datasets) return dataset
def test_multi_target_dataset(set_up): _, base_dataset, _, _, _, _ = set_up base_dataset.target_name = ['pathological', 'gender', 'age'] x, y = base_dataset[0] assert len(y) == 3 assert base_dataset.description.to_list() == y concat_ds = BaseConcatDataset([base_dataset]) windows_ds = create_fixed_length_windows( concat_ds, window_size_samples=100, window_stride_samples=100, start_offset_samples=0, stop_offset_samples=None, drop_last_window=False, mapping={True: 1, False: 0, 'M': 0, 'F': 1}, # map non-digit targets ) x, y, ind = windows_ds[0] assert len(y) == 3 assert y == [1, 0, 48] # order matters: pathological, gender, age
def load_5f_halt(args): """Loading and preprocessing the validation/traning data of the 5F or HaLT datasets. Parameters ---------- args : Namespace Input arguments. Returns ---------- dataset : BaseConcatDataset BaseConcatDataset of raw MNE arrays. """ import os from scipy import io import numpy as np import mne from sklearn.utils import resample from braindecode.datautil import exponential_moving_standardize from braindecode.datasets import BaseDataset, BaseConcatDataset ### Channel types ### # Rejecting channels A1, A1, X5 (see paper) ch_names = ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'Fz', 'Cz', 'Pz', 'stim'] ch_types = ['eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'stim'] idx_chan = np.ones(22, dtype=bool) unused_chans = np.asarray((10, 11, 21)) idx_chan[unused_chans] = False ### Subjects ### dataset = [] if args.dataset == '5f': data_dir = os.path.join(args.project_dir, 'datasets', '5f', 'data') elif args.dataset == 'halt': data_dir = os.path.join(args.project_dir, 'datasets', 'halt', 'data') files = os.listdir(data_dir) files.sort() # Loading only one subject for intra-subject analysis if args.inter_subject == False: used_files = [] for file in files: if 'Subject'+args.test_sub in file: used_files.append(file) else: used_files = files ### Loading and preprocessing the .mat data ### for file in used_files: print('\n\nData file --> '+file+'\n\n') current_sub = file.partition('Subject')[2][0] data = io.loadmat(os.path.join(data_dir, file), chars_as_strings=True)['o'] sfreq = np.asarray(data[0][0]['sampFreq'][0]) marker = np.transpose(np.asarray(data[0][0]['marker'])) data = np.transpose(np.asarray(data[0][0]['data']))[idx_chan,:] data = exponential_moving_standardize(data) data = np.append(data, marker, 0) del marker ### Converting to MNE format and downsample ### info = mne.create_info(ch_names, sfreq, ch_types) raw_train = mne.io.RawArray(data, info) raw_train.info['highpass'] = 0.53 raw_train.info['lowpass'] = 70 del data ### Get events and downsample data ### events = mne.find_events(raw_train, stim_channel='stim', output='onset', consecutive='increasing') # Drop unused events idx = np.ones(events.shape[0], dtype=bool) for e in range(len(idx)): if events[e,2] > 6: idx[e] = False events = events[idx] # Drop stimuli channel raw_train.pick_types(eeg=True) # Downsampling the data raw_train.resample(args.sfreq) ### Dividing events into training, validation and test ### # For intra-subject decoding, 10 trials per condition are used for # validation, 10 trials for testing, and the remaining trials are used # for training. # For inter-subject decoding 75 trials per condition of the subject of # interest are used for validation and 75 for testing. All the data # from the other subjects is used for training. idx_train = np.zeros((events.shape[0],len(np.unique(events[:,2]))), dtype=bool) idx_val = np.zeros((events.shape[0],len(np.unique(events[:,2]))), dtype=bool) idx_test = np.zeros((events.shape[0],len(np.unique(events[:,2]))), dtype=bool) for e in range(len(np.unique(events[:,2]))): if args.inter_subject == False: shuf = resample(np.where(events[:,2] == e+1)[0], replace=False) idx_val[shuf[:10],e] = True idx_test[shuf[10:20],e] = True idx_train[shuf[20:],e] = True else: if args.test_sub == current_sub: idx_val[np.where(events[:,2] == e+1)[0][0:75],e] = True idx_test[np.where(events[:,2] == e+1)[0][75:150],e] = True else: idx_train[np.where(events[:,2] == e+1)[0],e] = True idx_train = np.sum(idx_train, 1, dtype=bool) idx_val = np.sum(idx_val, 1, dtype=bool) idx_test = np.sum(idx_test, 1, dtype=bool) events_train = events[idx_train,:] events_val = events[idx_val,:] events_test = events[idx_test,:] ### Creating the raw data annotations ### if args.dataset == '5f': event_desc = {1: 'thumb', 2: 'index_finger', 3: 'middle_finger', 4: 'ring_finger', 5: 'pinkie_finger'} elif args.dataset == 'halt': event_desc = {1: 'left_hand', 2: 'right_hand', 3: 'passive_neutral', 4: 'left_leg', 5: 'tongue', 6: 'right_leg'} if args.inter_subject == False: annotations_train = mne.annotations_from_events(events_train, sfreq, event_desc=event_desc) annotations_val = mne.annotations_from_events(events_val, sfreq, event_desc=event_desc) annotations_test = mne.annotations_from_events(events_test, sfreq, event_desc=event_desc) # Creating 1s trials annotations_train.duration = np.repeat(1., len(events_train)) annotations_val.duration = np.repeat(1., len(events_val)) annotations_test.duration = np.repeat(1., len(events_test)) # Adding annotations to raw data raw_val = raw_train.copy() raw_test = raw_train.copy() raw_train.set_annotations(annotations_train) raw_val.set_annotations(annotations_val) raw_test.set_annotations(annotations_test) else: if args.test_sub == current_sub: annotations_val = mne.annotations_from_events(events_val, sfreq, event_desc=event_desc) annotations_test = mne.annotations_from_events(events_test, sfreq, event_desc=event_desc) # Creating 1s trials annotations_val.duration = np.repeat(1., len(events_val)) annotations_test.duration = np.repeat(1., len(events_test)) # Adding annotations to raw data raw_val = raw_train.copy() raw_test = raw_train.copy() raw_val.set_annotations(annotations_val) raw_test.set_annotations(annotations_test) else: annotations_train = mne.annotations_from_events(events_train, sfreq, event_desc=event_desc) # Creating 1s trials annotations_train.duration = np.repeat(1., len(events_train)) # Adding annotations to raw data raw_train.set_annotations(annotations_train) ### Converting to BaseConcatDataset format ### description_train = {'subject': current_sub, 'partition': 'training'} description_val = {'subject': current_sub, 'partition': 'validation'} description_test = {'subject': current_sub, 'partition': 'test'} if args.inter_subject == False: dataset.append(BaseDataset(raw_train, description_train)) dataset.append(BaseDataset(raw_val, description_val)) dataset.append(BaseDataset(raw_test, description_test)) else: if args.test_sub == current_sub: dataset.append(BaseDataset(raw_val, description_val)) dataset.append(BaseDataset(raw_test, description_test)) else: dataset.append(BaseDataset(raw_train, description_train)) dataset = BaseConcatDataset(dataset) ### Output ### return dataset
# import numpy as np from sklearn.model_selection import train_test_split from braindecode.datasets import BaseConcatDataset random_state = 31 subjects = np.unique(windows_dataset.description['subject']) subj_train, subj_valid = train_test_split( subjects, test_size=0.5, random_state=random_state) split_ids = {'train': subj_train, 'valid': subj_valid} splitted = dict() for name, values in split_ids.items(): splitted[name] = BaseConcatDataset( [ds for ds in windows_dataset.datasets if ds.description['subject'] in values]) train_set = splitted['train'] valid_set = splitted['valid'] ###################################################################### # Create sequence samplers # ------------------------ # ###################################################################### # Following the time distributed approach of [1]_, we will need to provide our # neural network with sequences of windows, such that the embeddings of # multiple consecutive windows can be concatenated and provided to a final
def test_variable_length_trials_cropped_decoding(): cuda = False set_random_seeds(seed=20210726, cuda=cuda) # create fake tuh abnormal dataset tuh = _TUHAbnormalMock(path='') # fake variable length trials by cropping first recording splits = tuh.split([[i] for i in range(len(tuh.datasets))]) preprocess( concat_ds=splits['0'], preprocessors=[ Preprocessor('crop', tmax=300), ], ) variable_tuh = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets))]) # make sure we actually have different length trials assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0) # create windows variable_tuh_windows = create_fixed_length_windows( concat_ds=variable_tuh, window_size_samples=1000, window_stride_samples=1000, drop_last_window=False, mapping={ True: 1, False: 0 }, ) # create train and valid set splits = variable_tuh_windows.split( [[i] for i in range(len(variable_tuh_windows.datasets))]) variable_tuh_windows_train = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets) - 1)]) variable_tuh_windows_valid = BaseConcatDataset( [splits[str(len(tuh.datasets) - 1)]]) for x, y, ind in variable_tuh_windows_train: break train_split = predefined_split(variable_tuh_windows_valid) # initialize a model model = ShallowFBCSPNet( in_chans=x.shape[0], n_classes=len(tuh.description.pathological.unique()), ) to_dense_prediction_model(model) if cuda: model.cuda() # create and train a classifier clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.nll_loss, optimizer=torch.optim.Adam, batch_size=32, callbacks=['accuracy'], train_split=train_split, ) clf.fit(variable_tuh_windows_train, y=None, epochs=3) # make sure it does what we expect np.testing.assert_allclose( clf.history[:, 'train_loss'], np.array([ 0.689495325088501, 0.1353449523448944, 0.006638816092163324, ]), rtol=1e-1, atol=1e-1, ) np.testing.assert_allclose( clf.history[:, 'valid_loss'], np.array([ 2.925871, 3.611423, 4.23494, ]), rtol=1e-1, atol=1e-1, )