def create_compatible_dataset(mat_file): train_data, labels = read_mat_file(mat_file) validation_data = train_data[7500:10000] validation_labels = labels[7500:10000] train_data = train_data[:7500] labels = labels[:7500] # train_data = train_data.transpose() new_train_data = get_mutlivariate_data(train_data, 500) new_validation_data = get_mutlivariate_data(validation_data, 500) # train_data = train_data.reshape([train_data.shape[0], train_data.shape[1], 1]) compatible_train_dataset = create_from_X_y(new_train_data, labels, drop_last_window=False, sfreq=1000, window_size_samples=500, window_stride_samples=10) compatible_valid_dataset = create_from_X_y(new_validation_data, validation_labels, drop_last_window=False, sfreq=1000, window_size_samples=500, window_stride_samples=10) return compatible_train_dataset, compatible_valid_dataset
def test_crops_data_loader_explicit(): X = np.arange(0, 15) y = [0] n_time_in = 10 n_time_out = 4 expected_crops = [np.arange(0, 10), np.arange(4, 14), np.arange(5, 15)] dataset = create_from_X_y(X[None, None], y, window_size_samples=n_time_in, window_stride_samples=n_time_out, drop_last_window=False) Xs, ys, i_s = zip(*list(dataset)) assert len(Xs) == len(ys) == 3 for actual, expected, in zip(Xs, expected_crops): np.testing.assert_array_equal(actual.squeeze(), expected)
def test_eeg_classifier(): # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes, update_path=False) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel="auto", verbose="WARNING") for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events, _ = mne.events_from_annotations(raw) # Use only EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude="bads") # Extract trials, only using EEG channels epoched = mne.Epochs( raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True, ) # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) # This will determine how many crops are processed in parallel input_window_samples = 450 n_classes = 2 in_chans = X.shape[1] # final_conv_length determines the size of the receptive field of the ConvNet model = ShallowFBCSPNet( in_chans=in_chans, n_classes=n_classes, input_window_samples=input_window_samples, final_conv_length=12, ) to_dense_prediction_model(model) if cuda: model.cuda() # determine output size test_input = np_to_th( np.ones((2, in_chans, input_window_samples, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] train_set = create_from_X_y(X[:48], y[:48], drop_last_window=False, sfreq=100, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input) valid_set = create_from_X_y(X[48:60], y[48:60], drop_last_window=False, sfreq=100, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input) cropped_cb_train = CroppedTrialEpochScoring( "accuracy", name="train_trial_accuracy", lower_is_better=False, on_train=True, ) cropped_cb_valid = CroppedTrialEpochScoring( "accuracy", on_train=False, name="valid_trial_accuracy", lower_is_better=False, ) clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=nll_loss, optimizer=optim.Adam, train_split=predefined_split(valid_set), batch_size=32, callbacks=[ ("train_trial_accuracy", cropped_cb_train), ("valid_trial_accuracy", cropped_cb_valid), ], ) clf.fit(train_set, y=None, epochs=4) expected = [{ 'batches': [{ 'train_batch_size': 32, 'train_loss': 1.6639312505722046 }, { 'train_batch_size': 32, 'train_loss': 2.6161606311798096 }, { 'train_batch_size': 32, 'train_loss': 1.627132773399353 }, { 'valid_batch_size': 24, 'valid_loss': 0.9677614569664001 }], 'epoch': 1, 'train_batch_count': 3, 'train_loss': 1.9690748850504558, 'train_loss_best': True, 'train_trial_accuracy': 0.4791666666666667, 'train_trial_accuracy_best': True, 'valid_batch_count': 1, 'valid_loss': 0.9677614569664001, 'valid_loss_best': True, 'valid_trial_accuracy': 0.5, 'valid_trial_accuracy_best': True }, { 'batches': [{ 'train_batch_size': 32, 'train_loss': 1.3829222917556763 }, { 'train_batch_size': 32, 'train_loss': 1.3123714923858643 }, { 'train_batch_size': 32, 'train_loss': 1.0109959840774536 }, { 'valid_batch_size': 24, 'valid_loss': 1.9435862302780151 }], 'epoch': 2, 'train_batch_count': 3, 'train_loss': 1.2354299227396648, 'train_loss_best': True, 'train_trial_accuracy': 0.5, 'train_trial_accuracy_best': True, 'valid_batch_count': 1, 'valid_loss': 1.9435862302780151, 'valid_loss_best': False, 'valid_trial_accuracy': 0.5, 'valid_trial_accuracy_best': False }, { 'batches': [{ 'train_batch_size': 32, 'train_loss': 1.172208547592163 }, { 'train_batch_size': 32, 'train_loss': 0.8899562954902649 }, { 'train_batch_size': 32, 'train_loss': 1.0232216119766235 }, { 'valid_batch_size': 24, 'valid_loss': 0.9585554599761963 }], 'epoch': 3, 'train_batch_count': 3, 'train_loss': 1.0284621516863506, 'train_loss_best': True, 'train_trial_accuracy': 0.5, 'train_trial_accuracy_best': False, 'valid_batch_count': 1, 'valid_loss': 0.9585554599761963, 'valid_loss_best': True, 'valid_trial_accuracy': 0.5, 'valid_trial_accuracy_best': False }, { 'batches': [{ 'train_batch_size': 32, 'train_loss': 0.9693693518638611 }, { 'train_batch_size': 32, 'train_loss': 0.900641918182373 }, { 'train_batch_size': 32, 'train_loss': 0.8839665651321411 }, { 'valid_batch_size': 24, 'valid_loss': 0.873468816280365 }], 'epoch': 4, 'train_batch_count': 3, 'train_loss': 0.9179926117261251, 'train_loss_best': True, 'train_trial_accuracy': 0.625, 'train_trial_accuracy_best': True, 'valid_batch_count': 1, 'valid_loss': 0.873468816280365, 'valid_loss_best': True, 'valid_trial_accuracy': 0.4166666666666667, 'valid_trial_accuracy_best': False }] history_without_dur = [{k: v for k, v in h.items() if k != "dur"} for h in clf.history] assert_deep_allclose(expected, history_without_dur, atol=1e-3, rtol=1e-3)
def test_cropped_trial_epoch_scoring(): dataset_train = None # Definition of test cases predictions_cases = [ # Expected predictions classification results: [1, 0, 0, 0] np.array([ [[0.2, 0.1, 0.1, 0.1], [0.8, 0.9, 0.9, 0.9]], # trial 0 preds [[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], # trial 1 preds [[1.0, 1.0, 1.0, 0.2], [0.0, 0.0, 0.0, 0.8]], # trial 2 preds [[0.9, 0.8, 0.9, 1.0], [0.1, 0.2, 0.1, 0.0]], # trial 3 preds ]), # Expected predictions classification results: [1, 1, 1, 0] np.array([ [[0.2, 0.1, 0.1, 0.1], [0.8, 0.9, 0.9, 0.9]], [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]], [[0.0, 0.0, 0.0, 0.2], [1.0, 1.0, 1.0, 0.8]], [[0.9, 0.8, 0.9, 1.0], [0.1, 0.2, 0.1, 0.0]], ]), ] y_true_cases = [ [torch.tensor([0, 0]), torch.tensor([1, 1])], [torch.tensor([1, 1]), torch.tensor([1, 1])], ] expected_accuracies_cases = [0.25, 0.75] window_inds = [ ( torch.tensor([0, 0]), # i_window_in_trials [None], # won't be used torch.tensor([4, 4]), # i_window_stops ), ( torch.tensor([0, 0]), # i_window_in_trials [None], # won't be used torch.tensor([4, 4]), # i_window_stops ) ] for predictions, y_true, accuracy in zip(predictions_cases, y_true_cases, expected_accuracies_cases): dataset_valid = create_from_X_y(np.zeros((4, 1, 10)), np.concatenate(y_true), window_size_samples=10, window_stride_samples=4, drop_last_window=False) mock_skorch_net = MockSkorchNet() cropped_trial_epoch_scoring = CroppedTrialEpochScoring("accuracy", on_train=False) mock_skorch_net.callbacks = [("", cropped_trial_epoch_scoring)] cropped_trial_epoch_scoring.initialize() cropped_trial_epoch_scoring.y_preds_ = [ to_tensor(predictions[:2], device="cpu"), to_tensor(predictions[2:], device="cpu"), ] cropped_trial_epoch_scoring.y_trues_ = y_true cropped_trial_epoch_scoring.window_inds_ = window_inds cropped_trial_epoch_scoring.on_epoch_end(mock_skorch_net, dataset_train, dataset_valid) np.testing.assert_almost_equal(mock_skorch_net.history[0]["accuracy"], accuracy)
def test_cropped_decoding(): # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data( subject_id, event_codes, update_path=False ) # Load each of the files parts = [ mne.io.read_raw_edf( path, preload=True, stim_channel="auto", verbose="WARNING" ) for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events, _ = mne.events_from_annotations(raw) # Use only EEG channels eeg_channel_inds = mne.pick_types( raw.info, meg=False, eeg=True, stim=False, eog=False, exclude="bads" ) # Extract trials, only using EEG channels epoched = mne.Epochs( raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True, ) # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) # This will determine how many crops are processed in parallel input_window_samples = 450 n_classes = 2 in_chans = X.shape[1] # final_conv_length determines the size of the receptive field of the ConvNet model = ShallowFBCSPNet( in_chans=in_chans, n_classes=n_classes, input_window_samples=input_window_samples, final_conv_length=12, ) to_dense_prediction_model(model) if cuda: model.cuda() # Perform forward pass to determine how many outputs per input n_preds_per_input = get_output_shape(model, in_chans, input_window_samples)[2] train_set = create_from_X_y(X[:60], y[:60], drop_last_window=False, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input) valid_set = create_from_X_y(X[60:], y[60:], drop_last_window=False, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input) train_split = predefined_split(valid_set) clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.nll_loss, optimizer=optim.Adam, train_split=train_split, batch_size=32, callbacks=['accuracy'], ) clf.fit(train_set, y=None, epochs=4) np.testing.assert_allclose( clf.history[:, 'train_loss'], np.array( [ 1.6666231592496237, 1.2292670885721841, 1.1270817518234253, 1.1752660751342774 ] ), rtol=1e-3, atol=1e-4, ) np.testing.assert_allclose( clf.history[:, 'valid_loss'], np.array( [ 1.5687058925628663, 0.8510023872057597, 2.087181798617045, 0.7100235184033712 ] ), rtol=1e-3, atol=1e-3, ) np.testing.assert_allclose( clf.history[:, 'train_accuracy'], np.array( [ 0.48333333333333334, 0.5, 0.5, 0.6333333333333333 ] ), rtol=1e-3, atol=1e-4, ) np.testing.assert_allclose( clf.history[:, 'valid_accuracy'], np.array( [ 0.533333, 0.5, 0.466667, 0.666667 ] ), rtol=1e-3, atol=1e-4, )
physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes, update_path=False) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel='auto') for path in physionet_paths ] ############################################################################### # We take the required data, targets and additional information sampling # frequency and channel names from the loaded data. Note that this data and # information can originate from any source. X = [raw.get_data() for raw in parts] y = event_codes sfreq = parts[0].info["sfreq"] ch_names = parts[0].info["ch_names"] ############################################################################### # Convert to data format compatible with skorch and braindecode: windows_dataset = create_from_X_y( X, y, drop_last_window=False, sfreq=sfreq, ch_names=ch_names, window_stride_samples=500, window_size_samples=500, )
def test_eeg_classifier(): # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes, update_path=False) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel="auto", verbose="WARNING") for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events, _ = mne.events_from_annotations(raw) # Use only EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude="bads") # Extract trials, only using EEG channels epoched = mne.Epochs( raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True, ) # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) # This will determine how many crops are processed in parallel input_window_samples = 450 n_classes = 2 in_chans = X.shape[1] # final_conv_length determines the size of the receptive field of the ConvNet model = ShallowFBCSPNet( in_chans=in_chans, n_classes=n_classes, input_window_samples=input_window_samples, final_conv_length=12, ) to_dense_prediction_model(model) if cuda: model.cuda() # determine output size test_input = np_to_th( np.ones((2, in_chans, input_window_samples, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] train_set = create_from_X_y(X[:48], y[:48], drop_last_window=False, sfreq=100, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input) valid_set = create_from_X_y(X[48:60], y[48:60], drop_last_window=False, sfreq=100, window_size_samples=input_window_samples, window_stride_samples=n_preds_per_input) cropped_cb_train = CroppedTrialEpochScoring( "accuracy", name="train_trial_accuracy", lower_is_better=False, on_train=True, ) cropped_cb_valid = CroppedTrialEpochScoring( "accuracy", on_train=False, name="valid_trial_accuracy", lower_is_better=False, ) clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=nll_loss, optimizer=optim.Adam, train_split=predefined_split(valid_set), batch_size=32, callbacks=[ ("train_trial_accuracy", cropped_cb_train), ("valid_trial_accuracy", cropped_cb_valid), ], ) clf.fit(train_set, y=None, epochs=4) # Reproduce this exact output by using pprint(history_without_dur) and adjusting # indentation of all lines after first expectedh = [{ 'batches': [{ 'train_batch_size': 32, 'train_loss': 1.4175944328308105 }, { 'train_batch_size': 32, 'train_loss': 2.4414331912994385 }, { 'train_batch_size': 32, 'train_loss': 1.476792812347412 }, { 'valid_batch_size': 24, 'valid_loss': 1.2322615385055542 }], 'epoch': 1, 'train_batch_count': 3, 'train_loss': 1.7786068121592205, 'train_loss_best': True, 'train_trial_accuracy': 0.5, 'train_trial_accuracy_best': True, 'valid_batch_count': 1, 'valid_loss': 1.2322615385055542, 'valid_loss_best': True, 'valid_trial_accuracy': 0.5, 'valid_trial_accuracy_best': True }, { 'batches': [{ 'train_batch_size': 32, 'train_loss': 0.9673743844032288 }, { 'train_batch_size': 32, 'train_loss': 1.218681812286377 }, { 'train_batch_size': 32, 'train_loss': 1.5651403665542603 }, { 'valid_batch_size': 24, 'valid_loss': 1.123423457145691 }], 'epoch': 2, 'train_batch_count': 3, 'train_loss': 1.250398854414622, 'train_loss_best': True, 'train_trial_accuracy': 0.5, 'train_trial_accuracy_best': False, 'valid_batch_count': 1, 'valid_loss': 1.123423457145691, 'valid_loss_best': True, 'valid_trial_accuracy': 0.5, 'valid_trial_accuracy_best': False }, { 'batches': [{ 'train_batch_size': 32, 'train_loss': 1.1562678813934326 }, { 'train_batch_size': 32, 'train_loss': 1.5787755250930786 }, { 'train_batch_size': 32, 'train_loss': 1.306514859199524 }, { 'valid_batch_size': 24, 'valid_loss': 1.037418007850647 }], 'epoch': 3, 'train_batch_count': 3, 'train_loss': 1.3471860885620117, 'train_loss_best': False, 'train_trial_accuracy': 0.5208333333333334, 'train_trial_accuracy_best': True, 'valid_batch_count': 1, 'valid_loss': 1.037418007850647, 'valid_loss_best': True, 'valid_trial_accuracy': 0.5, 'valid_trial_accuracy_best': False }, { 'batches': [{ 'train_batch_size': 32, 'train_loss': 1.8480840921401978 }, { 'train_batch_size': 32, 'train_loss': 1.0466501712799072 }, { 'train_batch_size': 32, 'train_loss': 0.9813234210014343 }, { 'valid_batch_size': 24, 'valid_loss': 0.9420649409294128 }], 'epoch': 4, 'train_batch_count': 3, 'train_loss': 1.2920192281405132, 'train_loss_best': False, 'train_trial_accuracy': 0.75, 'train_trial_accuracy_best': True, 'valid_batch_count': 1, 'valid_loss': 0.9420649409294128, 'valid_loss_best': True, 'valid_trial_accuracy': 0.4166666666666667, 'valid_trial_accuracy_best': False }] history_without_dur = [{k: v for k, v in h.items() if k != "dur"} for h in clf.history] assert_deep_allclose(expectedh, history_without_dur, atol=1e-3, rtol=1e-3) return clf