Esempio n. 1
0
def generate_cases(subject_id, trials, bad_channels=[]):
    '''
    3x60
    4x60
    -> 12 * 60 = 720ms
    -> 60ms overlap
    '''

    label_converter = LabelConverter()

    data = []
    labels = []
    channel_meta = []

    #     trial_meta = [];
    #     trial_id = 0;

    for stimulus, trial_data in trials.iteritems():
        label = label_converter.get_stimulus_id(stimulus)
        log.debug('processing {} with {} samples and label {}'.format(
            stimulus, trial_data.shape, label))
        channels = trial_data.transpose()
        for i, channel in enumerate(channels):
            channel_id = i + 1
            if channel_id in bad_channels:
                log.debug('skipping bad channel {}'.format(channel_id))
                continue

            # convert to float32
            channel = np.asfarray(channel, dtype='float32')

            data.append(channel)
            labels.append(label)
            #             trial_meta.append([trial_id, stimulus]);
            channel_meta.append(i)

#         trial_id += 1;

    data = np.vstack(data)
    labels = np.vstack(labels)
    #     trial_meta = np.vstack(trial_meta);
    channel_meta = np.vstack(channel_meta)
    #     subject_meta = np.vstack(subject_meta);

    log.debug('generated {} data points and {} labels '.format(
        data.shape, labels.shape))

    #     return data, labels, trial_meta, channel_meta;
    return data, labels, channel_meta
Esempio n. 2
0
def generate_cases(subject_id, trials, bad_channels=[]):
    '''
    3x60
    4x60
    -> 12 * 60 = 720ms
    -> 60ms overlap
    '''
    
    label_converter = LabelConverter();
    
    data = [];
    labels = [];
    channel_meta = [];
    
#     trial_meta = [];
#     trial_id = 0;

    for stimulus, trial_data in trials.iteritems():
        label = label_converter.get_stimulus_id(stimulus);
        log.debug('processing {} with {} samples and label {}'.format(stimulus,trial_data.shape,label));
        channels = trial_data.transpose();
        for i, channel in enumerate(channels):
            channel_id = i+1;
            if channel_id in bad_channels:
                log.debug('skipping bad channel {}'.format(channel_id));
                continue;
    
            # convert to float32
            channel = np.asfarray(channel, dtype='float32');
            
            data.append(channel);
            labels.append(label);
#             trial_meta.append([trial_id, stimulus]);
            channel_meta.append(i);
        
#         trial_id += 1;

    data = np.vstack(data);
    labels = np.vstack(labels);
#     trial_meta = np.vstack(trial_meta);
    channel_meta = np.vstack(channel_meta);
#     subject_meta = np.vstack(subject_meta);

    log.debug('generated {} data points and {} labels '.format(data.shape, labels.shape));

#     return data, labels, trial_meta, channel_meta;
    return data, labels, channel_meta;
Esempio n. 3
0
def import_dataset(source_path, target_path):

    #     config = load_config(default_config='../train_sda.cfg');

    # DATA_ROOT = source_path

    # DATA_ROOT = config.eeg.get('dataset_root', './')
    SAMPLE_RATE = 400  # in Hz
    TRIAL_LENGTH = 32  # in sec

    TRIAL_LENGTH += 4  # add 4s after end of presentation

    TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH

    log.info('using dataset at {}'.format(source_path))
    '''
    Note from Dan:
    All subjects should have channels 15, 16, 17 and 18 removed [...]
    If you want to make them truly identical, you could remove channel 19 from
    the subjects with more channels, although this should be 'good' data.
    '''
    bad_channels = {}
    bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21]
    bad_channels[2] = [7, 8, 15, 16, 17, 18, 20, 21]
    bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21]
    bad_channels[4] = [7, 8, 15, 16, 17, 18, 20, 21]
    bad_channels[5] = [7, 8, 15, 16, 17, 18, 20, 21]
    bad_channels[6] = [7, 8, 9, 12, 15, 16, 17, 18]
    bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20]
    bad_channels[8] = [7, 8, 15, 16, 17, 18, 20, 21]
    bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20]
    bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21]
    bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21]
    bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21]
    bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20]

    label_converter = LabelConverter()

    metadb_file = os.path.join(target_path, 'metadata_db.pklz')
    metadb = {}  # empty DB

    with log_timing(log, 'generating datasets'):
        for subject_id in xrange(1, 14):
            search_path = os.path.join(source_path,
                                       'Sub{0:03d}*'.format(subject_id))
            sourcefile_path = glob.glob(search_path)

            if sourcefile_path is None or len(sourcefile_path) == 0:
                log.warn('nothing found at {}'.format(search_path))
                continue
            else:
                sourcefile_path = sourcefile_path[0]

            trials = split_session(sourcefile_path, TRIAL_SAMPLE_LENGTH)

            for stimulus, trial_data in trials.iteritems():
                stimulus_id = label_converter.get_stimulus_id(stimulus)
                log.debug(
                    'processing {} with {} samples and stimulus_id {}'.format(
                        stimulus, trial_data.shape, stimulus_id))

                channels = trial_data.transpose()
                trial_data = []
                channel_ids = []
                for i, channel in enumerate(channels):
                    channel_id = i + 1
                    # filter bad channels
                    if channel_id in bad_channels[subject_id]:
                        log.debug('skipping bad channel {}'.format(channel_id))
                        continue

                    # convert to float32
                    channel = np.asfarray(channel, dtype='float32')

                    trial_data.append(channel)
                    channel_ids.append(channel_id)

                trial_data = np.vstack(
                    trial_data).transpose()  # fromat: (samples, channels)
                log.debug('extracted {} from channels: {}'.format(
                    trial_data.shape, channel_ids))

                label = label_converter.get_label(
                    stimulus_id, 'rhythm')  # raw label, unsorted
                label = label_converter.shuffle_classes[
                    label]  # sorted label id
                metadata = {
                    'subject':
                    subject_id,
                    'label':
                    label,
                    'meta_label':
                    label_converter.get_label(stimulus_id, 'rhythm_meta'),
                    'stimulus':
                    stimulus,
                    'stimulus_id':
                    stimulus_id,
                    'rhythm_type':
                    label_converter.get_label(stimulus_id, 'rhythm'),
                    'tempo':
                    label_converter.get_label(stimulus_id, 'tempo'),
                    'audio_file':
                    label_converter.get_label(stimulus_id, 'audio_file'),
                    'trial_no':
                    1,
                    'trial_type':
                    'perception',
                    'condition':
                    'n/a',
                    'channels':
                    channel_ids,
                }

                # save data
                savepath = generate_filepath_from_metadata(metadata)
                save(os.path.join(target_path, savepath),
                     (trial_data, metadata),
                     mkdirs=True)

                # save metadata
                metadb[savepath] = metadata

                log.debug('imported {}={} as {}'.format(
                    label, metadata['meta_label'], savepath))

        save(metadb_file, metadb, mkdirs=True)
    log.info('import finished')
Esempio n. 4
0
def import_dataset(source_path, target_path):

#     config = load_config(default_config='../train_sda.cfg');

    # DATA_ROOT = source_path

    # DATA_ROOT = config.eeg.get('dataset_root', './')
    SAMPLE_RATE = 400 # in Hz
    TRIAL_LENGTH = 32 # in sec

    TRIAL_LENGTH += 4 # add 4s after end of presentation

    TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH

    log.info('using dataset at {}'.format(source_path))

    '''
    Note from Dan:
    All subjects should have channels 15, 16, 17 and 18 removed [...]
    If you want to make them truly identical, you could remove channel 19 from
    the subjects with more channels, although this should be 'good' data.
    '''
    bad_channels = {}
    bad_channels[1]  = [5, 6,                   15, 16, 17, 18,  20, 21]
    bad_channels[2]  = [      7, 8,             15, 16, 17, 18,  20, 21]
    bad_channels[3]  = [5, 6,                   15, 16, 17, 18,  20, 21]
    bad_channels[4]  = [      7, 8,             15, 16, 17, 18,  20, 21]
    bad_channels[5]  = [      7, 8,             15, 16, 17, 18,  20, 21]
    bad_channels[6]  = [      7, 8, 9,  12,     15, 16, 17, 18         ]
    bad_channels[7]  = [5, 6,           12,     15, 16, 17, 18,  20    ]
    bad_channels[8]  = [      7, 8,             15, 16, 17, 18,  20, 21]
    bad_channels[9]  = [5, 6,           12,     15, 16, 17, 18,  20    ]
    bad_channels[10] = [5, 6,                   15, 16, 17, 18,  20, 21]
    bad_channels[11] = [5, 6,                   15, 16, 17, 18,  20, 21]
    bad_channels[12] = [5, 6,                   15, 16, 17, 18,  20, 21]
    bad_channels[13] = [5, 6,           12,     15, 16, 17, 18,  20    ]

    label_converter = LabelConverter()

    metadb_file = os.path.join(target_path, 'metadata_db.pklz')
    metadb = {}   # empty DB

    with log_timing(log, 'generating datasets'):
        for subject_id in xrange(1,14):
            search_path = os.path.join(source_path, 'Sub{0:03d}*'.format(subject_id))
            sourcefile_path = glob.glob(search_path)

            if sourcefile_path is None or len(sourcefile_path) == 0:
                log.warn('nothing found at {}'.format(search_path))
                continue
            else:
                sourcefile_path = sourcefile_path[0]

            trials = split_session(sourcefile_path, TRIAL_SAMPLE_LENGTH)

            for stimulus, trial_data in trials.iteritems():
                stimulus_id = label_converter.get_stimulus_id(stimulus)
                log.debug('processing {} with {} samples and stimulus_id {}'.
                          format(stimulus,trial_data.shape,stimulus_id))

                channels = trial_data.transpose()
                trial_data = []
                channel_ids = []
                for i, channel in enumerate(channels):
                    channel_id = i+1
                    # filter bad channels
                    if channel_id in bad_channels[subject_id]:
                        log.debug('skipping bad channel {}'.format(channel_id))
                        continue

                    # convert to float32
                    channel = np.asfarray(channel, dtype='float32')

                    trial_data.append(channel)
                    channel_ids.append(channel_id)

                trial_data = np.vstack(trial_data).transpose() # fromat: (samples, channels)
                log.debug('extracted {} from channels: {}'.format(trial_data.shape, channel_ids))

                label = label_converter.get_label(stimulus_id, 'rhythm') # raw label, unsorted
                label = label_converter.shuffle_classes[label]           # sorted label id
                metadata = {
                    'subject'       : subject_id,
                    'label'         : label,
                    'meta_label'    : label_converter.get_label(stimulus_id, 'rhythm_meta'),
                    'stimulus'      : stimulus,
                    'stimulus_id'   : stimulus_id,
                    'rhythm_type'   : label_converter.get_label(stimulus_id, 'rhythm'),
                    'tempo'         : label_converter.get_label(stimulus_id, 'tempo'),
                    'audio_file'    : label_converter.get_label(stimulus_id, 'audio_file'),
                    'trial_no'      : 1,
                    'trial_type'    : 'perception',
                    'condition'     : 'n/a',
                    'channels'      : channel_ids,
                }

                # save data
                savepath = generate_filepath_from_metadata(metadata)
                save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True)

                # save metadata
                metadb[savepath] = metadata

                log.debug('imported {}={} as {}'.format(label, metadata['meta_label'], savepath))

        save(metadb_file, metadb, mkdirs=True)
    log.info('import finished')