def generate_cases(subject_id, trials, bad_channels=[]): ''' 3x60 4x60 -> 12 * 60 = 720ms -> 60ms overlap ''' label_converter = LabelConverter() data = [] labels = [] channel_meta = [] # trial_meta = []; # trial_id = 0; for stimulus, trial_data in trials.iteritems(): label = label_converter.get_stimulus_id(stimulus) log.debug('processing {} with {} samples and label {}'.format( stimulus, trial_data.shape, label)) channels = trial_data.transpose() for i, channel in enumerate(channels): channel_id = i + 1 if channel_id in bad_channels: log.debug('skipping bad channel {}'.format(channel_id)) continue # convert to float32 channel = np.asfarray(channel, dtype='float32') data.append(channel) labels.append(label) # trial_meta.append([trial_id, stimulus]); channel_meta.append(i) # trial_id += 1; data = np.vstack(data) labels = np.vstack(labels) # trial_meta = np.vstack(trial_meta); channel_meta = np.vstack(channel_meta) # subject_meta = np.vstack(subject_meta); log.debug('generated {} data points and {} labels '.format( data.shape, labels.shape)) # return data, labels, trial_meta, channel_meta; return data, labels, channel_meta
def generate_cases(subject_id, trials, bad_channels=[]): ''' 3x60 4x60 -> 12 * 60 = 720ms -> 60ms overlap ''' label_converter = LabelConverter(); data = []; labels = []; channel_meta = []; # trial_meta = []; # trial_id = 0; for stimulus, trial_data in trials.iteritems(): label = label_converter.get_stimulus_id(stimulus); log.debug('processing {} with {} samples and label {}'.format(stimulus,trial_data.shape,label)); channels = trial_data.transpose(); for i, channel in enumerate(channels): channel_id = i+1; if channel_id in bad_channels: log.debug('skipping bad channel {}'.format(channel_id)); continue; # convert to float32 channel = np.asfarray(channel, dtype='float32'); data.append(channel); labels.append(label); # trial_meta.append([trial_id, stimulus]); channel_meta.append(i); # trial_id += 1; data = np.vstack(data); labels = np.vstack(labels); # trial_meta = np.vstack(trial_meta); channel_meta = np.vstack(channel_meta); # subject_meta = np.vstack(subject_meta); log.debug('generated {} data points and {} labels '.format(data.shape, labels.shape)); # return data, labels, trial_meta, channel_meta; return data, labels, channel_meta;
def import_dataset(source_path, target_path): # config = load_config(default_config='../train_sda.cfg'); # DATA_ROOT = source_path # DATA_ROOT = config.eeg.get('dataset_root', './') SAMPLE_RATE = 400 # in Hz TRIAL_LENGTH = 32 # in sec TRIAL_LENGTH += 4 # add 4s after end of presentation TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH log.info('using dataset at {}'.format(source_path)) ''' Note from Dan: All subjects should have channels 15, 16, 17 and 18 removed [...] If you want to make them truly identical, you could remove channel 19 from the subjects with more channels, although this should be 'good' data. ''' bad_channels = {} bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[2] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[4] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[5] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[6] = [7, 8, 9, 12, 15, 16, 17, 18] bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20] bad_channels[8] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20] bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20] label_converter = LabelConverter() metadb_file = os.path.join(target_path, 'metadata_db.pklz') metadb = {} # empty DB with log_timing(log, 'generating datasets'): for subject_id in xrange(1, 14): search_path = os.path.join(source_path, 'Sub{0:03d}*'.format(subject_id)) sourcefile_path = glob.glob(search_path) if sourcefile_path is None or len(sourcefile_path) == 0: log.warn('nothing found at {}'.format(search_path)) continue else: sourcefile_path = sourcefile_path[0] trials = split_session(sourcefile_path, TRIAL_SAMPLE_LENGTH) for stimulus, trial_data in trials.iteritems(): stimulus_id = label_converter.get_stimulus_id(stimulus) log.debug( 'processing {} with {} samples and stimulus_id {}'.format( stimulus, trial_data.shape, stimulus_id)) channels = trial_data.transpose() trial_data = [] channel_ids = [] for i, channel in enumerate(channels): channel_id = i + 1 # filter bad channels if channel_id in bad_channels[subject_id]: log.debug('skipping bad channel {}'.format(channel_id)) continue # convert to float32 channel = np.asfarray(channel, dtype='float32') trial_data.append(channel) channel_ids.append(channel_id) trial_data = np.vstack( trial_data).transpose() # fromat: (samples, channels) log.debug('extracted {} from channels: {}'.format( trial_data.shape, channel_ids)) label = label_converter.get_label( stimulus_id, 'rhythm') # raw label, unsorted label = label_converter.shuffle_classes[ label] # sorted label id metadata = { 'subject': subject_id, 'label': label, 'meta_label': label_converter.get_label(stimulus_id, 'rhythm_meta'), 'stimulus': stimulus, 'stimulus_id': stimulus_id, 'rhythm_type': label_converter.get_label(stimulus_id, 'rhythm'), 'tempo': label_converter.get_label(stimulus_id, 'tempo'), 'audio_file': label_converter.get_label(stimulus_id, 'audio_file'), 'trial_no': 1, 'trial_type': 'perception', 'condition': 'n/a', 'channels': channel_ids, } # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata log.debug('imported {}={} as {}'.format( label, metadata['meta_label'], savepath)) save(metadb_file, metadb, mkdirs=True) log.info('import finished')
def import_dataset(source_path, target_path): # config = load_config(default_config='../train_sda.cfg'); # DATA_ROOT = source_path # DATA_ROOT = config.eeg.get('dataset_root', './') SAMPLE_RATE = 400 # in Hz TRIAL_LENGTH = 32 # in sec TRIAL_LENGTH += 4 # add 4s after end of presentation TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH log.info('using dataset at {}'.format(source_path)) ''' Note from Dan: All subjects should have channels 15, 16, 17 and 18 removed [...] If you want to make them truly identical, you could remove channel 19 from the subjects with more channels, although this should be 'good' data. ''' bad_channels = {} bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[2] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[4] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[5] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[6] = [ 7, 8, 9, 12, 15, 16, 17, 18 ] bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20 ] bad_channels[8] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20 ] bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20 ] label_converter = LabelConverter() metadb_file = os.path.join(target_path, 'metadata_db.pklz') metadb = {} # empty DB with log_timing(log, 'generating datasets'): for subject_id in xrange(1,14): search_path = os.path.join(source_path, 'Sub{0:03d}*'.format(subject_id)) sourcefile_path = glob.glob(search_path) if sourcefile_path is None or len(sourcefile_path) == 0: log.warn('nothing found at {}'.format(search_path)) continue else: sourcefile_path = sourcefile_path[0] trials = split_session(sourcefile_path, TRIAL_SAMPLE_LENGTH) for stimulus, trial_data in trials.iteritems(): stimulus_id = label_converter.get_stimulus_id(stimulus) log.debug('processing {} with {} samples and stimulus_id {}'. format(stimulus,trial_data.shape,stimulus_id)) channels = trial_data.transpose() trial_data = [] channel_ids = [] for i, channel in enumerate(channels): channel_id = i+1 # filter bad channels if channel_id in bad_channels[subject_id]: log.debug('skipping bad channel {}'.format(channel_id)) continue # convert to float32 channel = np.asfarray(channel, dtype='float32') trial_data.append(channel) channel_ids.append(channel_id) trial_data = np.vstack(trial_data).transpose() # fromat: (samples, channels) log.debug('extracted {} from channels: {}'.format(trial_data.shape, channel_ids)) label = label_converter.get_label(stimulus_id, 'rhythm') # raw label, unsorted label = label_converter.shuffle_classes[label] # sorted label id metadata = { 'subject' : subject_id, 'label' : label, 'meta_label' : label_converter.get_label(stimulus_id, 'rhythm_meta'), 'stimulus' : stimulus, 'stimulus_id' : stimulus_id, 'rhythm_type' : label_converter.get_label(stimulus_id, 'rhythm'), 'tempo' : label_converter.get_label(stimulus_id, 'tempo'), 'audio_file' : label_converter.get_label(stimulus_id, 'audio_file'), 'trial_no' : 1, 'trial_type' : 'perception', 'condition' : 'n/a', 'channels' : channel_ids, } # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata log.debug('imported {}={} as {}'.format(label, metadata['meta_label'], savepath)) save(metadb_file, metadb, mkdirs=True) log.info('import finished')