Python BaseConcatDataset Examples, braindecode.datasets.BaseConcatDataset Python Examples

Example #1

0

Show file

File: test_preprocess.py Project: gemeinl/braindecode-1

def test_preprocessors_with_misc_channels():
    rng = np.random.RandomState(42)
    signal_sfreq = 50
    info = mne.create_info(ch_names=['0', '1', 'target_0', 'target_1'],
                           sfreq=signal_sfreq,
                           ch_types=['eeg', 'eeg', 'misc', 'misc'])
    signal = rng.randn(2, 1000)
    targets = rng.randn(2, 1000)
    raw = mne.io.RawArray(np.concatenate([signal, targets]), info=info)
    desc = pd.Series({'pathological': True, 'gender': 'M', 'age': 48})
    base_dataset = BaseDataset(raw, desc, target_name=None)
    concat_ds = BaseConcatDataset([base_dataset])
    preprocessors = [
        Preprocessor('pick_types', eeg=True, misc=True),
        Preprocessor(lambda x: x / 1e6),
    ]

    preprocess(concat_ds, preprocessors)

    # Check whether preprocessing has not affected the targets
    # This is only valid for preprocessors that use mne functions which do not modify
    # `misc` channels.
    np.testing.assert_array_equal(
        concat_ds.datasets[0].raw.get_data()[-2:, :],
        targets
    )

Example #2

0

Show file

def concat_ds_targets():
    raws, description = fetch_data_with_moabb(
        dataset_name="BNCI2014001", subject_ids=4)
    events, _ = mne.events_from_annotations(raws[0])
    targets = events[:, -1] - 1
    ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)]
    concat_ds = BaseConcatDataset(ds)
    return concat_ds, targets

Example #3

0

Show file

def test_concat_concat_dataset(concat_ds_targets):
    concat_ds, targets = concat_ds_targets
    concat_ds1 = BaseConcatDataset(concat_ds.datasets[:2])
    concat_ds2 = BaseConcatDataset(concat_ds.datasets[2:])
    list_of_concat_ds = [concat_ds1, concat_ds2]
    descriptions = pd.concat([ds.description for ds in list_of_concat_ds])
    descriptions.reset_index(inplace=True, drop=True)
    lens = [0] + [len(ds) for ds in list_of_concat_ds]
    cumsums = [ds.cumulative_sizes for ds in list_of_concat_ds]
    cumsums = [ls
               for i, cumsum in enumerate(cumsums)
               for ls in np.array(cumsum) + lens[i]]
    concat_concat_ds = BaseConcatDataset(list_of_concat_ds)
    assert len(concat_concat_ds) == sum(lens)
    assert len(concat_concat_ds) == concat_concat_ds.cumulative_sizes[-1]
    assert len(concat_concat_ds.datasets) == len(descriptions)
    assert len(concat_concat_ds.description) == len(descriptions)
    np.testing.assert_array_equal(cumsums, concat_concat_ds.cumulative_sizes)
    pd.testing.assert_frame_equal(descriptions, concat_concat_ds.description)

Example #4

0

Show file

def test_save_varying_number_of_datasets_with_overwrite(
        setup_concat_windows_dataset, tmpdir):
    concat_windows_dataset = setup_concat_windows_dataset
    concat_windows_dataset.save(path=tmpdir, overwrite=False)
    subset = concat_windows_dataset.split([0])['0']
    with pytest.warns(UserWarning, match='The number of saved datasets'):
        subset.save(path=tmpdir, overwrite=True)

    # assert no warning raised when there are as many subdirectories than before
    with pytest.warns(None) as raised_warnings:
        concat_windows_dataset.save(path=tmpdir, overwrite=True)
        assert len(raised_warnings) == 0

    # assert no warning raised when there are more subdirectories than before
    double_concat_windows_dataset = BaseConcatDataset(
        [concat_windows_dataset, concat_windows_dataset])
    with pytest.warns(None) as raised_warnings:
        double_concat_windows_dataset.save(path=tmpdir, overwrite=True)
        assert len(raised_warnings) == 0

Example #5

0

Show file

File: test_preprocess.py Project: gemeinl/braindecode-1

def test_preprocess_overwrite(base_concat_ds, tmp_path, overwrite):
    preprocessors = [Preprocessor('crop', tmax=10, include_tmax=False)]

    # Create temporary directory with preexisting files
    save_dir = str(tmp_path)
    for i, ds in enumerate(base_concat_ds.datasets):
        concat_ds = BaseConcatDataset([ds])
        save_subdir = os.path.join(save_dir, str(i))
        os.makedirs(save_subdir)
        concat_ds.save(save_subdir, overwrite=True)

    if overwrite:
        preprocess(base_concat_ds, preprocessors, save_dir, overwrite=True)
        # Make sure the serialized data is preprocessed
        preproc_concat_ds = load_concat_dataset(save_dir, True)
        assert all([len(ds.raw.times) == 2500
                    for ds in preproc_concat_ds.datasets])
    else:
        with pytest.raises(FileExistsError):
            preprocess(base_concat_ds, preprocessors, save_dir,
                       overwrite=False)

Example #6

0

Show file

def windows_ds():
    raws, description = fetch_data_with_moabb(
        dataset_name='BNCI2014001', subject_ids=4)
    ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)]
    concat_ds = BaseConcatDataset(ds)

    windows_ds = create_fixed_length_windows(
        concat_ds=concat_ds, start_offset_samples=0, stop_offset_samples=None,
        window_size_samples=500, window_stride_samples=500,
        drop_last_window=False, preload=False)

    return windows_ds

Example #7

0

Show file

def target_windows_ds():
    raws, description = fetch_data_with_moabb(dataset_name='BNCI2014001',
                                              subject_ids=4)
    ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)]
    concat_ds = BaseConcatDataset(ds)

    windows_ds = create_windows_from_events(concat_ds,
                                            trial_start_offset_samples=0,
                                            trial_stop_offset_samples=0,
                                            window_size_samples=None,
                                            window_stride_samples=None,
                                            drop_last_window=False)

    return windows_ds

Example #8

0

Show file

def fake_regression_dataset(n_fake_recs, n_fake_chs, fake_sfreq, fake_duration_s):
    datasets = []
    for i in range(n_fake_recs):
        train_or_eval = "eval" if i == 0 else "train"
        raw, save_fname = create_mne_dummy_raw(
            n_channels=n_fake_chs, n_times=fake_duration_s*fake_sfreq,
            sfreq=fake_sfreq, savedir=None)
        target = np.random.randint(0, 100, n_classes)
        if n_classes == 1:
            target = target[0]
        fake_descrition = pd.Series(
            data=[target, train_or_eval],
            index=["target", "session"])
        base_ds = BaseDataset(raw, fake_descrition, target_name="target")
        datasets.append(base_ds)
    dataset = BaseConcatDataset(datasets)
    return dataset

Example #9

0

Show file

def test_multi_target_dataset(set_up):
    _, base_dataset, _, _, _, _ = set_up
    base_dataset.target_name = ['pathological', 'gender', 'age']
    x, y = base_dataset[0]
    assert len(y) == 3
    assert base_dataset.description.to_list() == y
    concat_ds = BaseConcatDataset([base_dataset])
    windows_ds = create_fixed_length_windows(
        concat_ds,
        window_size_samples=100,
        window_stride_samples=100,
        start_offset_samples=0,
        stop_offset_samples=None,
        drop_last_window=False,
        mapping={True: 1, False: 0, 'M': 0, 'F': 1},  # map non-digit targets
    )
    x, y, ind = windows_ds[0]
    assert len(y) == 3
    assert y == [1, 0, 48]  # order matters: pathological, gender, age

Example #10

0

Show file

def load_5f_halt(args):
	"""Loading and preprocessing the validation/traning data of the 5F or HaLT
	datasets.

	Parameters
	----------
	args : Namespace
		Input arguments.

	Returns
	----------
	dataset : BaseConcatDataset
		BaseConcatDataset of raw MNE arrays.

	"""

	import os
	from scipy import io
	import numpy as np
	import mne
	from sklearn.utils import resample
	from braindecode.datautil import exponential_moving_standardize
	from braindecode.datasets import BaseDataset, BaseConcatDataset

	### Channel types ###
	# Rejecting channels A1, A1, X5 (see paper)
	ch_names = ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2',
		'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'Fz', 'Cz', 'Pz', 'stim']
	ch_types = ['eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg',
		'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg', 'eeg',
		'eeg', 'stim']
	idx_chan = np.ones(22, dtype=bool)
	unused_chans = np.asarray((10, 11, 21))
	idx_chan[unused_chans] = False

	### Subjects ###
	dataset = []
	if args.dataset == '5f':
		data_dir = os.path.join(args.project_dir, 'datasets', '5f', 'data')
	elif args.dataset == 'halt':
		data_dir = os.path.join(args.project_dir, 'datasets', 'halt', 'data')
	files = os.listdir(data_dir)
	files.sort()
	# Loading only one subject for intra-subject analysis
	if args.inter_subject == False:
		used_files = []
		for file in files:
			if 'Subject'+args.test_sub in file: used_files.append(file)
	else:
		used_files = files

	### Loading and preprocessing the .mat data ###
	for file in used_files:
		print('\n\nData file --> '+file+'\n\n')
		current_sub = file.partition('Subject')[2][0]
		data = io.loadmat(os.path.join(data_dir, file),
			chars_as_strings=True)['o']
		sfreq = np.asarray(data[0][0]['sampFreq'][0])
		marker = np.transpose(np.asarray(data[0][0]['marker']))
		data = np.transpose(np.asarray(data[0][0]['data']))[idx_chan,:]
		data = exponential_moving_standardize(data)
		data = np.append(data, marker, 0)
		del marker

		### Converting to MNE format and downsample ###
		info = mne.create_info(ch_names, sfreq, ch_types)
		raw_train = mne.io.RawArray(data, info)
		raw_train.info['highpass'] = 0.53
		raw_train.info['lowpass'] = 70
		del data

		### Get events and downsample data ###
		events = mne.find_events(raw_train, stim_channel='stim', output='onset',
			consecutive='increasing')
		# Drop unused events
		idx = np.ones(events.shape[0], dtype=bool)
		for e in range(len(idx)):
			if events[e,2] > 6:
				idx[e] = False
		events = events[idx]
		# Drop stimuli channel
		raw_train.pick_types(eeg=True)
		# Downsampling the data
		raw_train.resample(args.sfreq)

		### Dividing events into training, validation and test ###
		# For intra-subject decoding, 10 trials per condition are used for
		# validation, 10 trials for testing, and the remaining trials are used
		# for training.
		# For inter-subject decoding 75 trials per condition of the subject of
		# interest are used for validation and 75 for testing. All the data
		# from the other subjects is used for training.
		idx_train = np.zeros((events.shape[0],len(np.unique(events[:,2]))),
			dtype=bool)
		idx_val = np.zeros((events.shape[0],len(np.unique(events[:,2]))),
			dtype=bool)
		idx_test = np.zeros((events.shape[0],len(np.unique(events[:,2]))),
			dtype=bool)
		for e in range(len(np.unique(events[:,2]))):
			if args.inter_subject == False:
				shuf = resample(np.where(events[:,2] == e+1)[0], replace=False)
				idx_val[shuf[:10],e] = True
				idx_test[shuf[10:20],e] = True
				idx_train[shuf[20:],e] = True
			else:
				if args.test_sub == current_sub:
					idx_val[np.where(events[:,2] == e+1)[0][0:75],e] = True
					idx_test[np.where(events[:,2] == e+1)[0][75:150],e] = True
				else:
					idx_train[np.where(events[:,2] == e+1)[0],e] = True
		idx_train = np.sum(idx_train, 1, dtype=bool)
		idx_val = np.sum(idx_val, 1, dtype=bool)
		idx_test = np.sum(idx_test, 1, dtype=bool)
		events_train = events[idx_train,:]
		events_val = events[idx_val,:]
		events_test = events[idx_test,:]

		### Creating the raw data annotations ###
		if args.dataset == '5f':
			event_desc = {1: 'thumb', 2: 'index_finger', 3: 'middle_finger',
				4: 'ring_finger', 5: 'pinkie_finger'}
		elif args.dataset == 'halt':
			event_desc = {1: 'left_hand', 2: 'right_hand', 3: 'passive_neutral',
				4: 'left_leg', 5: 'tongue', 6: 'right_leg'}
		if args.inter_subject == False:
			annotations_train = mne.annotations_from_events(events_train, sfreq,
				event_desc=event_desc)
			annotations_val = mne.annotations_from_events(events_val, sfreq,
				event_desc=event_desc)
			annotations_test = mne.annotations_from_events(events_test, sfreq,
				event_desc=event_desc)
			# Creating 1s trials
			annotations_train.duration = np.repeat(1., len(events_train))
			annotations_val.duration = np.repeat(1., len(events_val))
			annotations_test.duration = np.repeat(1., len(events_test))
			# Adding annotations to raw data
			raw_val = raw_train.copy()
			raw_test = raw_train.copy()
			raw_train.set_annotations(annotations_train)
			raw_val.set_annotations(annotations_val)
			raw_test.set_annotations(annotations_test)
		else:
			if args.test_sub == current_sub:
				annotations_val = mne.annotations_from_events(events_val, sfreq,
					event_desc=event_desc)
				annotations_test = mne.annotations_from_events(events_test,
					sfreq, event_desc=event_desc)
				# Creating 1s trials
				annotations_val.duration = np.repeat(1., len(events_val))
				annotations_test.duration = np.repeat(1., len(events_test))
				# Adding annotations to raw data
				raw_val = raw_train.copy()
				raw_test = raw_train.copy()
				raw_val.set_annotations(annotations_val)
				raw_test.set_annotations(annotations_test)
			else:
				annotations_train = mne.annotations_from_events(events_train,
					sfreq, event_desc=event_desc)
				# Creating 1s trials
				annotations_train.duration = np.repeat(1., len(events_train))
				# Adding annotations to raw data
				raw_train.set_annotations(annotations_train)

		### Converting to BaseConcatDataset format ###
		description_train = {'subject': current_sub, 'partition': 'training'}
		description_val = {'subject': current_sub, 'partition': 'validation'}
		description_test = {'subject': current_sub, 'partition': 'test'}
		if args.inter_subject == False:
			dataset.append(BaseDataset(raw_train, description_train))
			dataset.append(BaseDataset(raw_val, description_val))
			dataset.append(BaseDataset(raw_test, description_test))
		else:
			if args.test_sub == current_sub:
				dataset.append(BaseDataset(raw_val, description_val))
				dataset.append(BaseDataset(raw_test, description_test))
			else:
				dataset.append(BaseDataset(raw_train, description_train))
	dataset = BaseConcatDataset(dataset)

	### Output ###
	return dataset

Example #11

0

Show file

File: plot_sleep_staging.py Project: MohammadJavadD/braindecode

#

import numpy as np
from sklearn.model_selection import train_test_split
from braindecode.datasets import BaseConcatDataset

random_state = 31
subjects = np.unique(windows_dataset.description['subject'])
subj_train, subj_valid = train_test_split(
    subjects, test_size=0.5, random_state=random_state)

split_ids = {'train': subj_train, 'valid': subj_valid}
splitted = dict()
for name, values in split_ids.items():
    splitted[name] = BaseConcatDataset(
        [ds for ds in windows_dataset.datasets
         if ds.description['subject'] in values])

train_set = splitted['train']
valid_set = splitted['valid']

######################################################################
# Create sequence samplers
# ------------------------
#


######################################################################
# Following the time distributed approach of [1]_, we will need to provide our
# neural network with sequences of windows, such that the embeddings of
# multiple consecutive windows can be concatenated and provided to a final

Example #12

0

Show file

File: test_variable_length_trials_decoding.py Project: gemeinl/braindecode-1

def test_variable_length_trials_cropped_decoding():
    cuda = False
    set_random_seeds(seed=20210726, cuda=cuda)

    # create fake tuh abnormal dataset
    tuh = _TUHAbnormalMock(path='')
    # fake variable length trials by cropping first recording
    splits = tuh.split([[i] for i in range(len(tuh.datasets))])
    preprocess(
        concat_ds=splits['0'],
        preprocessors=[
            Preprocessor('crop', tmax=300),
        ],
    )
    variable_tuh = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets))])
    # make sure we actually have different length trials
    assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0)

    # create windows
    variable_tuh_windows = create_fixed_length_windows(
        concat_ds=variable_tuh,
        window_size_samples=1000,
        window_stride_samples=1000,
        drop_last_window=False,
        mapping={
            True: 1,
            False: 0
        },
    )

    # create train and valid set
    splits = variable_tuh_windows.split(
        [[i] for i in range(len(variable_tuh_windows.datasets))])
    variable_tuh_windows_train = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets) - 1)])
    variable_tuh_windows_valid = BaseConcatDataset(
        [splits[str(len(tuh.datasets) - 1)]])
    for x, y, ind in variable_tuh_windows_train:
        break
    train_split = predefined_split(variable_tuh_windows_valid)

    # initialize a model
    model = ShallowFBCSPNet(
        in_chans=x.shape[0],
        n_classes=len(tuh.description.pathological.unique()),
    )
    to_dense_prediction_model(model)
    if cuda:
        model.cuda()

    # create and train a classifier
    clf = EEGClassifier(
        model,
        cropped=True,
        criterion=CroppedLoss,
        criterion__loss_function=torch.nn.functional.nll_loss,
        optimizer=torch.optim.Adam,
        batch_size=32,
        callbacks=['accuracy'],
        train_split=train_split,
    )
    clf.fit(variable_tuh_windows_train, y=None, epochs=3)

    # make sure it does what we expect
    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array([
            0.689495325088501,
            0.1353449523448944,
            0.006638816092163324,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )

    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array([
            2.925871,
            3.611423,
            4.23494,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )