def compute_cleaner(data, eog_data,marker_positions, ival, max_min=2, whisker_percent=5, whisker_length=3): """For Cleaner tests...""" assert eog_data.shape[0] == data.shape[0] axes = [range(data.shape[0]), range(data.shape[1])] markers = zip(marker_positions, [0] * len(marker_positions)) marker_def={'0':[0]} cnt = Data(data,axes=axes,names=['time', 'channels'], units=['ms', '#']) cnt.fs = 1000 cnt.markers = markers eog_axes = [range(eog_data.shape[0]), range(eog_data.shape[1])] eog_cnt = Data(eog_data,axes=eog_axes,names=['time', 'channels'], units=['ms', '#']) eog_cnt.fs = 1000 eog_cnt.markers = markers eog_proc = SignalProcessor(FakeLoader(eog_cnt),segment_ival=ival,marker_def=marker_def) cleaner = Cleaner(cnt,eog_proc,rejection_blink_ival=ival, max_min=max_min,rejection_var_ival=ival, whisker_percent=whisker_percent, whisker_length=whisker_length, low_cut_hz=None, high_cut_hz=None, filt_order=None,marker_def=marker_def) cleaner.clean() return cleaner
def convert_mushu_data(data, markers, fs, channels): """Convert mushu data into wyrm's ``Data`` format. This convenience method creates a continuous ``Data`` object from the parameters given. The timeaxis always starts from zero and its values are calculated from the sampling frequency ``fs`` and the length of ``data``. The ``names`` and ``units`` attributes are filled with default vaules. Parameters ---------- data : 2d array an 2 dimensional numpy array with the axes: (time, channel) markers : list of tuples: (float, str) a list of markers. Each element is a tuple of timestamp and string. The timestamp is the time in ms relative to the onset of the block of data. Note that negative values are *allowed* as well as values bigger than the length of the block of data returned. That is to be interpreted as a marker from the last block and a marker for a future block respectively. fs : float the sampling frequency, this number is used to calculate the timeaxis for the data channels : list or 1d array of strings the channel names Returns ------- cnt : continuous ``Data`` object Examples -------- Assuming that ``amp`` is an Amplifier instance from ``libmushu``, already configured but not started yet: >>> amp_fs = amp.get_sampling_frequency() >>> amp_channels = amp.get_channels() >>> amp.start() >>> while True: ... data, markers = amp.get_data() ... cnt = convert_mushu_data(data, markers, amp_fs, amp_channels) ... # some more code >>> amp.stop() References ---------- https://github.com/bbci/mushu """ time_axis = np.linspace(0, 1000 * data.shape[0] / fs, data.shape[0], endpoint=False) chan_axis = channels[:] axes = [time_axis, chan_axis] names = ['time', 'channel'] units = ['uV', '#'] cnt = Data(data=data.copy(), axes=axes, names=names, units=units) cnt.markers = markers[:] cnt.fs = fs return cnt
def compute_cleaner(data, eog_data, marker_positions, ival, max_min=2, whisker_percent=5, whisker_length=3): """For Cleaner tests...""" assert eog_data.shape[0] == data.shape[0] axes = [range(data.shape[0]), range(data.shape[1])] markers = zip(marker_positions, [0] * len(marker_positions)) marker_def = {'0': [0]} cnt = Data(data, axes=axes, names=['time', 'channels'], units=['ms', '#']) cnt.fs = 1000 cnt.markers = markers eog_axes = [range(eog_data.shape[0]), range(eog_data.shape[1])] eog_cnt = Data(eog_data, axes=eog_axes, names=['time', 'channels'], units=['ms', '#']) eog_cnt.fs = 1000 eog_cnt.markers = markers eog_proc = SignalProcessor(FakeLoader(eog_cnt), segment_ival=ival, marker_def=marker_def) cleaner = Cleaner(cnt, eog_proc, rejection_blink_ival=ival, max_min=max_min, rejection_var_ival=ival, whisker_percent=whisker_percent, whisker_length=whisker_length, low_cut_hz=None, high_cut_hz=None, filt_order=None, marker_def=marker_def) cleaner.clean() return cleaner
def load_graz(filename): # load the training data_mat = scio.loadmat('dataset_BCIcomp1.mat') data = data_mat['x_train'].astype('double') #print data.shape data = data.swapaxes(-3, -2) data = data.swapaxes(-1, -3) labels = data_mat['y_train'].astype('int').ravel() #print data.shape # convert into wyrm Data axes = [np.arange(i) for i in data.shape] axes[0] = labels axes[2] = [str(i) for i in range(data.shape[2])] names = ['Class', 'Time', 'Channel'] units = ['#', 'ms', '#'] dat_train = Data(data=data, axes=axes, names=names, units=units) dat_train.fs = 128 dat_train.class_names = ['left', 'right'] # load the test data #test_data_mat = loadmat(test_file) data = data_mat['x_test'].astype('double') data = data.swapaxes(-3, -2) data = data.swapaxes(-1, -3) # convert into wyrm Data axes = [np.arange(i) for i in data.shape] axes[2] = [str(i) for i in range(data.shape[2])] names = ['Class','Time', 'Channel'] units = ['#','ms', '#'] dat_test = Data(data=data, axes=axes, names=names, units=units) dat_test.fs = 128 # map labels 2 -> 0 dat_test.axes[0][dat_test.axes[0] == 2] = 0 dat_train.axes[0][dat_train.axes[0] == 2] = 0 return dat_train, dat_test
def load_mushu_data(meta): """Load saved EEG data in Mushu's format. This method loads saved data in Mushu's format and returns a continuous ``Data`` object. Parameters ---------- meta : str Path to `.meta` file. A Mushu recording consists of three different files: `.eeg`, `.marker`, and `.meta`. Returns ------- dat : Data Continuous Data object Examples -------- >>> dat = load_mushu_data('testrecording.meta') """ # reverse and replace and reverse again to replace only the last # (occurrence of .meta) datafile = meta[::-1].replace('atem.', 'gee.', 1)[::-1] markerfile = meta[::-1].replace('atem.', 'rekram.', 1)[::-1] assert path.exists(meta) and path.exists(datafile) and path.exists( markerfile) # load meta data with open(meta) as fh: metadata = json.load(fh) fs = metadata['Sampling Frequency'] channels = np.array(metadata['Channels']) # load eeg data data = np.fromfile(datafile, np.float32) data = data.reshape((-1, len(channels))) # load markers markers = [] with open(markerfile) as fh: for line in fh: ts, m = line.split(' ', 1) markers.append([float(ts), str(m).strip()]) # construct Data duration = len(data) * 1000 / fs axes = [np.linspace(0, duration, len(data), endpoint=False), channels] names = ['time', 'channels'] units = ['ms', '#'] dat = Data(data=data, axes=axes, names=names, units=units) dat.fs = fs dat.markers = markers return dat
def load_mushu_data(meta): """Load saved EEG data in Mushu's format. This method loads saved data in Mushu's format and returns a continuous ``Data`` object. Parameters ---------- meta : str Path to `.meta` file. A Mushu recording consists of three different files: `.eeg`, `.marker`, and `.meta`. Returns ------- dat : Data Continuous Data object Examples -------- >>> dat = load_mushu_data('testrecording.meta') """ # reverse and replace and reverse again to replace only the last # (occurrence of .meta) datafile = meta[::-1].replace('atem.', 'gee.', 1)[::-1] markerfile = meta[::-1].replace('atem.', 'rekram.', 1)[::-1] assert path.exists(meta) and path.exists(datafile) and path.exists(markerfile) # load meta data with open(meta, 'r') as fh: metadata = json.load(fh) fs = metadata['Sampling Frequency'] channels = np.array(metadata['Channels']) # load eeg data data = np.fromfile(datafile, np.float32) data = data.reshape((-1, len(channels))) # load markers markers = [] with open(markerfile, 'r') as fh: for line in fh: ts, m = line.split(' ', 1) markers.append([float(ts), str(m).strip()]) # construct Data duration = len(data) * 1000 / fs axes = [np.linspace(0, duration, len(data), endpoint=False), channels] names = ['time', 'channels'] units = ['ms', '#'] dat = Data(data=data, axes=axes, names=names, units=units) dat.fs = fs dat.markers = markers return dat
def test_segment_dat_with_restriction_to_new_data_ival_pos_pos(self): """Online Segmentation with ival +something..+something must work correctly.""" data = np.ones((9, 3)) time = np.linspace(0, 900, 9, endpoint=False) channels = 'a', 'b', 'c' markers = [[100, 'x'], [200, 'x'], [300, 'x']] dat = Data(data, [time, channels], ['time', 'channels'], ['ms', '#']) dat.fs = 10 dat.markers = markers mrk_def = {'class 1': ['x']} # each tuple has (number of new samples, expected epocs) samples_epos = [(0, 0), (1, 0), (2, 1), (3, 2), (4, 3), (5, 3)] for s, e in samples_epos: epo = segment_dat(dat, mrk_def, [100, 500], newsamples=s) self.assertEqual(epo.data.shape[0], e)
def test_equality(self): """Test the various (in)equalities.""" d1 = Data(self.data, self.axes, self.names, self.units) # known extra attributes d1.markers = [[123, 'foo'], [234, 'bar']] d1.fs = 100 # unknown extra attribute d1.foo = 'bar' # so far, so equal d2 = d1.copy() self.assertEqual(d1, d2) # different shape d2 = d1.copy() d2.data = np.arange(20).reshape(5, 4) self.assertNotEqual(d1, d2) # different data d2 = d1.copy() d2.data[0, 0] = 42 self.assertNotEqual(d1, d2) # different axes d2 = d1.copy() d2.axes[0] = np.arange(100) self.assertNotEqual(d1, d2) # different names d2 = d1.copy() d2.names[0] = 'baz' self.assertNotEqual(d1, d2) # different untis d2 = d1.copy() d2.units[0] = 'u3' self.assertNotEqual(d1, d2) # different known extra attribute d2 = d1.copy() d2.markers[0] = [123, 'baz'] self.assertNotEqual(d1, d2) # different known extra attribute d2 = d1.copy() d2.fs = 10 self.assertNotEqual(d1, d2) # different unknown extra attribute d2 = d1.copy() d2.baz = 'baz' self.assertNotEqual(d1, d2) # different new unknown extra attribute d2 = d1.copy() d2.bar = 42 self.assertNotEqual(d1, d2)
def data_factory(data, axes=None, names=None, units=None, markers=None): """Helper method to create Data objects.""" if len(data) == 0: axes = names = units = [] else: if axes is None: axes = [] for i in range(data.ndim): a = [i * 10 for i in range(data.shape[i])] axes.append(a) if names is None: names = ['name %i' % i for i in range(data.ndim)] if units is None: units = ['unit %i' % i for i in range(data.ndim)] d = Data(data=data, axes=axes, names=names, units=units) d.markers = markers if markers is not None else [] d.fs = 100 return d
def test_segment_dat_with_restriction_to_new_data_ival_neg_neg(self): """Online Segmentation with ival -something..-something must work correctly.""" # [ 0., 100., 200., 300., 400., 500., 600., 700., 800.] # 100 400 M500 # 200 500 M600 # 299 599 M699 # 300 600 M700 # 301 600 M701 data = np.ones((9, 3)) time = np.linspace(0, 900, 9, endpoint=False) channels = 'a', 'b', 'c' markers = [[500, 'x'], [600, 'x'], [699, 'x'], [700, 'x'], [701, 'x']] dat = Data(data, [time, channels], ['time', 'channels'], ['ms', '#']) dat.fs = 10 dat.markers = markers mrk_def = {'class 1': ['x']} # each tuple has (number of new samples, expected epocs) samples_epos = [(0, 0), (1, 0), (2, 2), (3, 4), (4, 5), (5, 5)] for s, e in samples_epos: epo = segment_dat(dat, mrk_def, [-400, -100], newsamples=s) self.assertEqual(epo.data.shape[0], e)
def test_segment_dat_with_restriction_to_new_data_ival_pos_pos(self): """Online Segmentation with ival +something..+something must work correctly.""" # [ 0., 100., 200., 300., 400., 500., 600., 700., 800.] # M100 200 600 # M200 300 700 # M299 399 799 # M300 400 800 # M301 401 801 data = np.ones((9, 3)) time = np.linspace(0, 900, 9, endpoint=False) channels = 'a', 'b', 'c' markers = [[100, 'x'], [200, 'x'], [299, 'x'], [300, 'x'], [301, 'x']] dat = Data(data, [time, channels], ['time', 'channels'], ['ms', '#']) dat.fs = 10 dat.markers = markers mrk_def = {'class 1': ['x']} # each tuple has (number of new samples, expected epocs) samples_epos = [(0, 0), (1, 1), (2, 3), (3, 4), (4, 5), (5, 5)] for s, e in samples_epos: epo = segment_dat(dat, mrk_def, [100, 500], newsamples=s) self.assertEqual(epo.data.shape[0], e)
def test_segment_dat_with_restriction_to_new_data_ival_neg_neg(self): """Online Segmentation with ival -something..-something must work correctly.""" # [ 0., 100., 200., 300., 400., 500., 600., 700., 800.] # 100 400 M500 # 200 500 M600 # 299 599 M699 # 300 600 M700 # 301 600 M701 data = np.ones((9, 3)) time = np.linspace(0, 900, 9, endpoint=False) channels = "a", "b", "c" markers = [[500, "x"], [600, "x"], [699, "x"], [700, "x"], [701, "x"]] dat = Data(data, [time, channels], ["time", "channels"], ["ms", "#"]) dat.fs = 10 dat.markers = markers mrk_def = {"class 1": ["x"]} # each tuple has (number of new samples, expected epocs) samples_epos = [(0, 0), (1, 0), (2, 2), (3, 4), (4, 5), (5, 5)] for s, e in samples_epos: epo = segment_dat(dat, mrk_def, [-400, -100], newsamples=s) self.assertEqual(epo.data.shape[0], e)
def test_segment_dat_with_restriction_to_new_data_ival_pos_pos(self): """Online Segmentation with ival +something..+something must work correctly.""" # [ 0., 100., 200., 300., 400., 500., 600., 700., 800.] # M100 200 600 # M200 300 700 # M299 399 799 # M300 400 800 # M301 401 801 data = np.ones((9, 3)) time = np.linspace(0, 900, 9, endpoint=False) channels = "a", "b", "c" markers = [[100, "x"], [200, "x"], [299, "x"], [300, "x"], [301, "x"]] dat = Data(data, [time, channels], ["time", "channels"], ["ms", "#"]) dat.fs = 10 dat.markers = markers mrk_def = {"class 1": ["x"]} # each tuple has (number of new samples, expected epocs) samples_epos = [(0, 0), (1, 1), (2, 3), (3, 4), (4, 5), (5, 5)] for s, e in samples_epos: epo = segment_dat(dat, mrk_def, [100, 500], newsamples=s) self.assertEqual(epo.data.shape[0], e)
def load_brain_vision_data(vhdr): """Load Brain Vision data from a file. This methods loads the continuous EEG data, and returns a ``Data`` object of continuous data ``[time, channel]``, along with the markers and the sampling frequency. The EEG data is returned in micro Volt. Parameters ---------- vhdr : str Path to a VHDR file Returns ------- dat : Data Continuous Data with the additional attributes ``.fs`` for the sampling frequency and ``.marker`` for a list of markers. Each marker is a tuple of ``(time in ms, marker)``. Raises ------ AssertionError If one of the consistency checks fails Examples -------- >>> dat = load_brain_vision_data('path/to/vhdr') >>> dat.fs 1000 >>> dat.data.shape (54628, 61) """ logger.debug('Loading Brain Vision Data Exchange Header File') with open(vhdr) as fh: fdata = map(str.strip, fh.readlines()) fdata = filter(lambda x: not x.startswith(';'), fdata) fdata = filter(lambda x: len(x) > 0, fdata) # check for the correct file version: assert fdata[0].endswith('1.0') # read all data into a dict where the key is the stanza of the file file_dict = dict() for line in fdata[1:]: if line.startswith('[') and line.endswith(']'): current_stanza = line[1:-1] file_dict[current_stanza] = [] else: file_dict[current_stanza].append(line) # translate known stanzas from simple list of strings to a dict for stanza in 'Common Infos', 'Binary Infos', 'Channel Infos': logger.debug(stanza) file_dict[stanza] = {line.split('=', 1)[0]: line.split('=', 1)[1] for line in file_dict[stanza]} # now file_dict contains the parsed data from the vhdr file # load the rest data_f = file_dict['Common Infos']['DataFile'] marker_f = file_dict['Common Infos']['MarkerFile'] data_f = path.sep.join([path.dirname(vhdr), data_f]) marker_f = path.sep.join([path.dirname(vhdr), marker_f]) n_channels = int(file_dict['Common Infos']['NumberOfChannels']) sampling_interval_microseconds = float(file_dict['Common Infos']['SamplingInterval']) fs = 1 / (sampling_interval_microseconds / 10**6) channels = [file_dict['Channel Infos']['Ch%i' % (i + 1)] for i in range(n_channels)] channels = map(lambda x: x.split(',')[0], channels) resolutions = [file_dict['Channel Infos']['Ch%i' % (i + 1)] for i in range(n_channels)] resolutions = map(lambda x: float(x.split(',')[2]), resolutions) # assert all channels have the same resolution of 0.1 # FIXME: that is not always true, for example if we measure pulse or # emg #assert all([i == 0.1 for i in resolutions]) # some assumptions about the data... assert file_dict['Common Infos']['DataFormat'] == 'BINARY' assert file_dict['Common Infos']['DataOrientation'] == 'MULTIPLEXED' assert file_dict['Binary Infos']['BinaryFormat'] == 'INT_16' # load EEG data logger.debug('Loading EEG Data.') data = np.fromfile(data_f, np.int16) data = data.reshape(-1, n_channels) data *= resolutions[0] n_samples = data.shape[0] # duration in ms duration = 1000 * n_samples / fs time = np.linspace(0, duration, n_samples, endpoint=False) # load marker logger.debug('Loading Marker.') regexp = r'^Mk(?P<mrk_nr>[0-9]*)=.*,(?P<mrk_descr>.*),(?P<mrk_pos>[0-9]*),[0-9]*,[0-9]*$' mrk = [] with open(marker_f) as fh: for line in fh: line = line.strip() match = re.match(regexp, line) if match is None: continue mrk_pos = match.group('mrk_pos') mrk_descr = match.group('mrk_descr') if len(mrk_descr) > 1: # marker := [samplenr, marker] #mrk.append([int(mrk_pos), mrk_descr]) # marker := [time in ms, marker] mrk.append([time[int(mrk_pos)], mrk_descr]) dat = Data(data, [time, channels], ['time', 'channel'], ['ms', '#']) dat.fs = fs dat.markers = mrk return dat
def load_bcicomp3_ds2(filename): """Load the BCI Competition III Data Set 2. This method loads the data set and converts it into Wyrm's ``Data`` format. Before you use it, you have to download the data set in Matlab format and unpack it. The directory with the extracted files must contain the ``Subject_*.mat``- and the ``eloc64.txt`` files. .. note:: If you need the true labels of the test sets, you'll have to download them separately from http://bbci.de/competition/iii/results/index.html#labels Parameters ---------- filename : str The path to the matlab file to load Returns ------- cnt : continuous `Data` object Examples -------- >>> dat = load_bcicomp3_ds2('/home/foo/data/Subject_A_Train.mat') """ STIMULUS_CODE = { # cols from left to right 1 : "agmsy5", 2 : "bhntz6", 3 : "ciou17", 4 : "djpv28", 5 : "ekqw39", 6 : "flrx4_", # rows from top to bottom 7 : "abcdef", 8 : "ghijkl", 9 : "mnopqr", 10: "stuvwx", 11: "yz1234", 12: "56789_" } # load the matlab data data_mat = loadmat(filename) # load the channel names (the same for all datasets eloc_file = path.sep.join([path.dirname(filename), 'eloc64.txt']) with open(eloc_file) as fh: data = fh.read() channels = [] for line in data.splitlines(): if line: chan = line.split()[-1] chan = chan.replace('.', '') channels.append(chan) # fix the channel names, some letters have the wrong capitalization for i, s in enumerate(channels): s2 = s.upper() s2 = s2.replace('Z', 'z') s2 = s2.replace('FP', 'Fp') channels[i] = s2 # The signal is recorded with 64 channels, bandpass filtered # 0.1-60Hz and digitized at 240Hz. The format is Character Epoch x # Samples x Channels data = data_mat['Signal'] data = data.astype('double') # For each sample: 1 if a row/colum was flashed, 0 otherwise flashing = data_mat['Flashing'].reshape(-1) #flashing = np.flatnonzero((np.diff(a) == 1)) + 1 tmp = [] for i, _ in enumerate(flashing): if i == 0: tmp.append(flashing[i]) continue if flashing[i] == flashing[i-1] == 1: tmp.append(0) continue tmp.append(flashing[i]) flashing = np.array(tmp) # For each sample: 0 when no row/colum was intensified, # 1..6 for intensified columns, 7..12 for intensified rows stimulus_code = data_mat['StimulusCode'].reshape(-1) stimulus_code = stimulus_code[flashing == 1] # 0 if no row/col was intensified or the intensified did not contain # the target character, 1 otherwise stimulus_type = data_mat.get('StimulusType', np.array([])).reshape(-1) # The target characters target_chars = data_mat.get('TargetChar', np.array([])).reshape(-1) fs = 240 data = data.reshape(-1, 64) timeaxis = np.linspace(0, data.shape[0] / fs * 1000, data.shape[0], endpoint=False) dat = Data(data=data, axes=[timeaxis, channels], names=['time', 'channel'], units=['ms', '#']) dat.fs = fs # preparing the markers target_mask = np.logical_and((flashing == 1), (stimulus_type == 1)) if len(stimulus_type) > 0 else [] nontarget_mask = np.logical_and((flashing == 1), (stimulus_type == 0)) if len(stimulus_type) > 0 else [] flashing = (flashing == 1) flashing = [[i, 'flashing'] for i in timeaxis[flashing]] targets = [[i, 'target'] for i in timeaxis[target_mask]] nontargets = [[i, 'nontarget'] for i in timeaxis[nontarget_mask]] dat.stimulus_code = stimulus_code[:] stimulus_code = zip([t for t, _ in flashing], [STIMULUS_CODE[i] for i in stimulus_code]) markers = flashing[:] markers.extend(targets) markers.extend(nontargets) markers.extend(stimulus_code) markers.sort() dat.markers = markers[:] return dat
def load_bcicomp3_ds1(dirname): """Load the BCI Competition III Data Set 1. This method loads the data set and converts it into Wyrm's ``Data`` format. Before you use it, you have to download the training- and test data in Matlab format and unpack it into a directory. .. note:: If you need the true labels of the test sets, you'll have to download them separately from http://bbci.de/competition/iii/results/index.html#labels Parameters ---------- dirname : str the directory where the ``Competition_train.mat`` and ``Competition_test.mat`` are located Returns ------- epo_train, epo_test : epoched ``Data`` objects Examples -------- >>> epo_test, epo_train = load_bcicomp3_ds1('/home/foo/bcicomp3_dataset1/') """ # construct the filenames from the dirname training_file = path.sep.join([dirname, 'Competition_train.mat']) test_file = path.sep.join([dirname, 'Competition_test.mat']) # load the training data training_data_mat = loadmat(training_file) data = training_data_mat['X'].astype('double') data = data.swapaxes(-1, -2) labels = training_data_mat['Y'].astype('int').ravel() # convert into wyrm Data axes = [np.arange(i) for i in data.shape] axes[0] = labels axes[2] = [str(i) for i in range(data.shape[2])] names = ['Class', 'Time', 'Channel'] units = ['#', 'ms', '#'] dat_train = Data(data=data, axes=axes, names=names, units=units) dat_train.fs = 1000 dat_train.class_names = ['pinky', 'tongue'] # load the test data test_data_mat = loadmat(test_file) data = test_data_mat['X'].astype('double') data = data.swapaxes(-1, -2) # convert into wyrm Data axes = [np.arange(i) for i in data.shape] axes[2] = [str(i) for i in range(data.shape[2])] names = ['Epoch', 'Time', 'Channel'] units = ['#', 'ms', '#'] dat_test = Data(data=data, axes=axes, names=names, units=units) dat_test.fs = 1000 # map labels -1 -> 0 dat_test.axes[0][dat_test.axes[0] == -1] = 0 dat_train.axes[0][dat_train.axes[0] == -1] = 0 return dat_train, dat_test
# convert labels into ints train_labels = np.array(train_labels, dtype=np.int8) test_labels = np.array(test_labels, dtype=np.int8) #sorted_by_labels = [[], [], []] #for i, label in enumerate(train_labels): # sorted_by_labels[int(label[0])].append(train_instances[i]) #pdb.set_trace() #shape = (2, len(sorted_by_labels[1][0]), len(sorted_by_labels[1][0])) #csp_training_data = np.ndarray(sorted_by_labels[1:2]) # only want positive classes in our CSP training data train_data = Data( np_train_instances, [train_labels, range(0, np_train_instances.shape[1]), range(1, 26)], ["class", "time", "channel"], ["#", "1s/250", "#"]) train_data.fs = 250 train_data.class_names = ["none", "left", "right"] test_data = Data( np_test_instances, [test_labels, range(0, np_test_instances.shape[1]), range(1, 26)], ["class", "time", "channel"], ["#", "1s/250", "#"]) test_data.fs = 250 #dat_train, dat_test = load_bcicomp3_ds1(DATA_DIR) dat_train = train_data dat_test = test_data #pdb.set_trace() # TODO: FILTER OUT LABELS OF NaN INSTANCES
def load_brain_vision_data(vhdr): """Load Brain Vision data from a file. This methods loads the continuous EEG data, and returns a ``Data`` object of continuous data ``[time, channel]``, along with the markers and the sampling frequency. The EEG data is returned in micro Volt. Parameters ---------- vhdr : str Path to a VHDR file Returns ------- dat : Data Continuous Data with the additional attributes ``.fs`` for the sampling frequency and ``.marker`` for a list of markers. Each marker is a tuple of ``(time in ms, marker)``. Raises ------ AssertionError If one of the consistency checks fails Examples -------- >>> dat = load_brain_vision_data('path/to/vhdr') >>> dat.fs 1000 >>> dat.data.shape (54628, 61) """ logger.debug('Loading Brain Vision Data Exchange Header File') with open(vhdr) as fh: fdata = map(str.strip, fh.readlines()) fdata = filter(lambda x: not x.startswith(';'), fdata) fdata = filter(lambda x: len(x) > 0, fdata) # check for the correct file version: assert fdata[0].endswith('1.0') # read all data into a dict where the key is the stanza of the file file_dict = dict() for line in fdata[1:]: if line.startswith('[') and line.endswith(']'): current_stanza = line[1:-1] file_dict[current_stanza] = [] else: file_dict[current_stanza].append(line) # translate known stanzas from simple list of strings to a dict for stanza in 'Common Infos', 'Binary Infos', 'Channel Infos': logger.debug(stanza) file_dict[stanza] = {line.split('=', 1)[0]: line.split('=', 1)[1] for line in file_dict[stanza]} # now file_dict contains the parsed data from the vhdr file # load the rest data_f = file_dict['Common Infos']['DataFile'] marker_f = file_dict['Common Infos']['MarkerFile'] data_f = path.sep.join([path.dirname(vhdr), data_f]) marker_f = path.sep.join([path.dirname(vhdr), marker_f]) n_channels = int(file_dict['Common Infos']['NumberOfChannels']) sampling_interval_microseconds = float(file_dict['Common Infos']['SamplingInterval']) fs = 1 / (sampling_interval_microseconds / 10**6) channels = [file_dict['Channel Infos']['Ch%i' % (i + 1)] for i in range(n_channels)] channels = map(lambda x: x.split(',')[0], channels) resolutions = [file_dict['Channel Infos']['Ch%i' % (i + 1)] for i in range(n_channels)] resolutions = map(lambda x: float(x.split(',')[2]), resolutions) # assert all channels have the same resolution of 0.1 # FIXME: that is not always true, for example if we measure pulse or # emg #assert all([i == 0.1 for i in resolutions]) # some assumptions about the data... assert file_dict['Common Infos']['DataFormat'] == 'BINARY' assert file_dict['Common Infos']['DataOrientation'] == 'MULTIPLEXED' assert file_dict['Binary Infos']['BinaryFormat'] == 'INT_16' # load EEG data logger.debug('Loading EEG Data.') data = np.fromfile(data_f, np.int16) data = data.reshape(-1, n_channels).astype(type(resolutions[0])) data *= resolutions[0] n_samples = data.shape[0] # duration in ms duration = 1000 * n_samples / fs time = np.linspace(0, duration, n_samples, endpoint=False) # load marker logger.debug('Loading Marker.') regexp = r'^Mk(?P<mrk_nr>[0-9]*)=.*,(?P<mrk_descr>.*),(?P<mrk_pos>[0-9]*),[0-9]*,[0-9]*$' mrk = [] with open(marker_f) as fh: for line in fh: line = line.strip() match = re.match(regexp, line) if match is None: continue mrk_pos = match.group('mrk_pos') mrk_descr = match.group('mrk_descr') if len(mrk_descr) > 1: # marker := [samplenr, marker] #mrk.append([int(mrk_pos), mrk_descr]) # marker := [time in ms, marker] mrk.append([time[int(mrk_pos)], mrk_descr]) dat = Data(data, [time, channels], ['time', 'channel'], ['ms', '#']) dat.fs = fs dat.markers = mrk return dat
def load_bcicomp3_ds2(filename): """Load the BCI Competition III Data Set 2. This method loads the data set and converts it into Wyrm's ``Data`` format. Before you use it, you have to download the data set in Matlab format and unpack it. The directory with the extracted files must contain the ``Subject_*.mat``- and the ``eloc64.txt`` files. .. note:: If you need the true labels of the test sets, you'll have to download them separately from http://bbci.de/competition/iii/results/index.html#labels Parameters ---------- filename : str The path to the matlab file to load Returns ------- cnt : continuous `Data` object Examples -------- >>> dat = load_bcicomp3_ds2('/home/foo/data/Subject_A_Train.mat') """ STIMULUS_CODE = { # cols from left to right 1 : "agmsy5", 2 : "bhntz6", 3 : "ciou17", 4 : "djpv28", 5 : "ekqw39", 6 : "flrx4_", # rows from top to bottom 7 : "abcdef", 8 : "ghijkl", 9 : "mnopqr", 10: "stuvwx", 11: "yz1234", 12: "56789_" } # load the matlab data data_mat = loadmat(filename) # load the channel names (the same for all datasets eloc_file = path.sep.join([path.dirname(filename), 'eloc64.txt']) with open(eloc_file) as fh: data = fh.read() channels = [] for line in data.splitlines(): if line: chan = line.split()[-1] chan = chan.replace('.', '') channels.append(chan) # fix the channel names, some letters have the wrong capitalization for i, s in enumerate(channels): s2 = s.upper() s2 = s2.replace('Z', 'z') s2 = s2.replace('FP', 'Fp') channels[i] = s2 # The signal is recorded with 64 channels, bandpass filtered # 0.1-60Hz and digitized at 240Hz. The format is Character Epoch x # Samples x Channels data = data_mat['Signal'] data = data.astype('double') # For each sample: 1 if a row/colum was flashed, 0 otherwise flashing = data_mat['Flashing'].reshape(-1) #flashing = np.flatnonzero((np.diff(a) == 1)) + 1 tmp = [] for i, _ in enumerate(flashing): if i == 0: tmp.append(flashing[i]) continue if flashing[i] == flashing[i-1] == 1: tmp.append(0) continue tmp.append(flashing[i]) flashing = np.array(tmp) # For each sample: 0 when no row/colum was intensified, # 1..6 for intensified columns, 7..12 for intensified rows stimulus_code = data_mat['StimulusCode'].reshape(-1) stimulus_code = stimulus_code[flashing == 1] # 0 if no row/col was intensified or the intensified did not contain # the target character, 1 otherwise stimulus_type = data_mat.get('StimulusType', np.array([])).reshape(-1) # The target characters target_chars = data_mat.get('TargetChar', np.array([])).reshape(-1) fs = 240 data = data.reshape(-1, 64) timeaxis = np.linspace(0, data.shape[0] / fs * 1000, data.shape[0], endpoint=False) dat = Data(data=data, axes=[timeaxis, channels], names=['time', 'channel'], units=['ms', '#']) dat.fs = fs # preparing the markers target_mask = np.logical_and((flashing == 1), (stimulus_type == 1)) if len(stimulus_type) > 0 else [] nontarget_mask = np.logical_and((flashing == 1), (stimulus_type == 0)) if len(stimulus_type) > 0 else [] flashing = (flashing == 1) flashing = [[i, 'flashing'] for i in timeaxis[flashing]] targets = [[i, 'target'] for i in timeaxis[target_mask]] nontargets = [[i, 'nontarget'] for i in timeaxis[nontarget_mask]] dat.stimulus_code = stimulus_code[:] stimulus_code = zip([t for t, _ in flashing], [STIMULUS_CODE[i] for i in stimulus_code]) markers = flashing[:] markers.extend(targets) markers.extend(nontargets) markers.extend(stimulus_code) #markers.sort() #dat.markers = markers[:] dat.markers = sorted(markers[:],key=lambda x: x[0]) return dat
def online_erp(fs, n_channels, subsample): logger.debug('Running Online ERP with {fs}Hz, and {channels}channels'.format(fs=fs, channels=n_channels)) target_fs = 100 # blocklen in ms blocklen = 1000 * 1 / target_fs # blocksize given the original fs and blocklen blocksize = fs * (blocklen / 1000) MRK_DEF = {'target': 'm'} SEG_IVAL = [0, 700] JUMPING_MEANS_IVALS = [150, 220], [200, 260], [310, 360], [550, 660] RING_BUFFER_CAP = 1000 cfy = [0, 0] fs_n = fs / 2 b_l, a_l = proc.signal.butter(5, [30 / fs_n], btype='low') b_h, a_h = proc.signal.butter(5, [.4 / fs_n], btype='high') zi_l = proc.lfilter_zi(b_l, a_l, n_channels) zi_h = proc.lfilter_zi(b_h, a_h, n_channels) ax_channels = np.array([str(i) for i in range(n_channels)]) names = ['time', 'channel'] units = ['ms', '#'] blockbuf = BlockBuffer(blocksize) ringbuf = RingBuffer(RING_BUFFER_CAP) times = [] # time since the last data was acquired t_last = time.time() # time since the last marker t_last_marker = time.time() # time since the experiment started t_start = time.time() full_iterations = 0 while full_iterations < 500: t0 = time.time() dt = time.time() - t_last samples = int(dt * fs) if samples == 0: continue t_last = time.time() # get data data = np.random.random((samples, n_channels)) ax_times = np.linspace(0, 1000 * (samples / fs), samples, endpoint=False) if t_last_marker + .01 < time.time(): t_last_marker = time.time() markers = [[ax_times[-1], 'm']] else: markers = [] cnt = Data(data, axes=[ax_times, ax_channels], names=names, units=units) cnt.fs = fs cnt.markers = markers # blockbuffer blockbuf.append(cnt) cnt = blockbuf.get() if not cnt: continue # filter cnt, zi_l = proc.lfilter(cnt, b_l, a_l, zi=zi_l) cnt, zi_h = proc.lfilter(cnt, b_h, a_h, zi=zi_h) # subsample if subsample: cnt = proc.subsample(cnt, target_fs) newsamples = cnt.data.shape[0] # ringbuffer ringbuf.append(cnt) cnt = ringbuf.get() # epoch epo = proc.segment_dat(cnt, MRK_DEF, SEG_IVAL, newsamples=newsamples) if not epo: continue # feature vectors fv = proc.jumping_means(epo, JUMPING_MEANS_IVALS) rv = proc.create_feature_vectors(fv) # classification proc.lda_apply(fv, cfy) # don't measure in the first second, where the ringbuffer is not # full yet. if time.time() - t_start < (RING_BUFFER_CAP / 1000): continue dt = time.time() - t0 times.append(dt) full_iterations += 1 return np.array(times)
def parse_raw(path_in, dir_out=None, ftype=None, region='frontal', drop_ref=True, drop_stim=True, n_samples=None, return_type='wyrm'): """Converts raw EEG into `dict`/`wyrm.Raw` using `mne.Raw` Params ------ path_in : str path to raw file dir_out : str or None if not None, will store raw signals in dir_out (.npy) ftype: str file type of raw file, supported formats are read from prepod.lib.constants.`const.SUPPORTED_FTYPES` region : str or None regions to return data for; supported strings include: 'central', 'frontal', 'parietal', 'occipital', 'temporal'; if None, all channels will be returned drop_ref : boolean whether reference electrodes should be dropped drop_stim : boolean whether stimulus electrodes should be dropped n_samples : int return subset of n_samples from start (mainly development) return_type : str one of ['wyrm'|'dict'], indicating what format to return the data in ('wyrm' -> wyrm.Data, 'dict' -> dict of ndArrays) Returns ------- raw : dict or wyrm.Data dict -> {'signal': ndArray [channels x time], 'srate': float, 'ch_names': list, 'n_chans_: int, 'time_points': ndArray, 'markers': list} wyrm.Data -> ('data': ndArray [time x channels], axes: list, 'names': list, 'units': list, 'fs': float, 'markers': list, 'starttime': datetime) Raises ------ TypeError if `ftype`, `region`, or `return_type` not supported See also -------- :type: wyrm.Data """ if ftype not in const.SUPPORTED_FTYPES: msg = 'File type {} not supported. Choose one of {}'.format( ftype, ', '.join(const.SUPPORTED_FTYPES)) raise TypeError(msg) if isinstance(path_in, list): paths = path_in else: if os.path.isdir(path_in): path_in = path_in + '/' if path_in[-1] != '/' else path_in paths = [path_in + el for el in hlp.return_fnames(path_in, substr=ftype)] else: paths = [path_in.strip()] subj_id = paths[0].split('/')[-1].split('_')[0] raw, raws = None, [] for idx, path in enumerate(paths): if idx == 0: if ftype == 'edf': raw = mne.io.read_raw_edf(path, preload=True) elif ftype == 'eeg': path = path.replace(ftype, 'vhdr') raw = mne.io.read_raw_brainvision(path, preload=True) raw.cals = np.array([]) else: if ftype == 'edf': _raw = mne.io.read_raw_edf(path, preload=True) elif ftype == 'eeg': path = path.replace(ftype, 'vhdr') _raw = mne.io.read_raw_brainvision(path, preload=True) raws.append(_raw) if len(raws): raw.append(raws) # append multiple file to continuous signal raw = strip_ch_names(raw) if drop_ref: to_drop = [el for el in raw.ch_names if 'Ref' in el] raw.drop_channels(to_drop) if drop_stim: to_drop = [el for el in raw.ch_names if 'STI' in el] raw.drop_channels(to_drop) if region and region in const.SUPPORTED_REGIONS: if region == 'full': to_drop = [] elif region == 'fronto-parietal': to_drop = [el for el in raw.ch_names if 'F' not in el and 'P' not in el] elif region == 'pre-frontal': to_drop = [el for el in raw.ch_names if 'Fp' not in el] else: to_drop = [el for el in raw.ch_names if region[0].upper() not in el] raw.drop_channels(to_drop) else: print('Your region of interest is not supported. Choose one of ' + str(const.SUPPORTED_REGIONS) + '. Will return full set.') if n_samples: signal = raw._data[:, :n_samples] times = raw.times[:n_samples] else: signal = raw._data times = raw.times start_time = datetime.datetime.utcfromtimestamp( raw.info['meas_date']).strftime(const.FORMATS['datetime']) d = { 'signal': signal * 1000000, # convert V to µV 'srate': raw.info['sfreq'], 'ch_names': raw.info['ch_names'], 'n_chans': len(raw.info['ch_names']), 'time_points': times * 1000, # convert s to ms 'markers': [], 'starttime': datetime.datetime.strptime(start_time, const.FORMATS['datetime']), 'subj_id': subj_id } d = hlp.fix_known_errors(d) print('Successfully read file(s) ' + ', '.join(paths)) if return_type == 'wyrm': data = d['signal'].transpose() axes = [d['time_points'], d['ch_names']] names = ['time', 'channels'] units = ['s', 'µV'] data = Data(data, axes, names, units) data.fs = d['srate'] data.markers = d['markers'] data.starttime = d['starttime'] elif return_type == 'dict': data = d else: msg = 'Return_type {} not supported.'.format(return_type) raise TypeError(msg) if dir_out: if not os.path.exists(dir_out): os.makedirs(dir_out) path_out = dir_out + paths[0].split('/')[-1].replace(ftype, 'npy') save_as_npy(data=data, path_out=path_out) return data