예제 #1
0
def interim(model_name, setname):
    """ Convert GAN image samples to tfrecord

    """

    _dir_results = os.path.join('models', model_name, 'results', setname, 'Samples')
    _dir_interim = os.path.join('data/interim', model_name, setname)
    utils.checkfolder(_dir_interim)

    # list filenames and classes. Also divides filenames into equally sized shards
    filenames, class_names = _get_filenames_and_classes(_dir_results)

    # save class dictionary
    class_dict = dict(zip(class_names, range(len(class_names))))
    utils.save_dict(class_dict, _dir_interim, 'class_dict.json')

    # convert images to tf records based on the list of filenames
    for shard_n in range(_NUM_SHARDS):
        utils.show_message('Processing shard %d/%d' % (shard_n+1,_NUM_SHARDS))
        tf_filename = _get_output_filename(_dir_interim, shard_n)

        with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
            _convert_to_tfrecord(filenames[shard_n], class_dict, tfrecord_writer)
        
    print('\nFinished converting GAN samples to tfrecord for %s %s!' % (model_name, setname))
예제 #2
0
 def save(self, path):
     info = dict()
     m = self.mean
     s = self.stdv
     for i, ch_name in enumerate(self.ch_names):
         info[ch_name] = {
             'mean_microvolt': float(m[i][0]),
             'stdv_microvolt': float(s[i][0])
         }
     save_dict(info, path)
예제 #3
0
    def prepare(self):
        train_files, train_labels = self._get_all_sorted_file_names_and_labels(train=True)
        assert len(train_files) == len(train_labels) and len(train_files) != 0

        test_files, test_labels = self._get_all_sorted_file_names_and_labels(train=False)
        assert len(test_files) == len(test_labels) and len(test_files) != 0

        # Find out normalization statistics:
        preprocessing_functions = self.default_preprocessing_functions()

        ch_names = DataGenerator.wanted_electrodes['EEG']
        if self.use_ekg:
            ch_names = ch_names + DataGenerator.wanted_electrodes['EKG']

        for split_type, split_files in zip(['train', 'test'],
                                           [train_files, test_files]):

            output_data_dir = os.path.join(self.cache_path, split_type, 'data')
            output_info_dir = os.path.join(self.cache_path, split_type, 'info')
            os.makedirs(output_data_dir, exist_ok=True)
            os.makedirs(output_info_dir, exist_ok=True)

            # Could be parallelized in the future
            for i, file in enumerate(split_files):
                try:
                    sensor_types = ('EEG', 'EKG1') if self.use_ekg else ('EEG',)
                    data, info_dict = self._load_file(file, preprocessing_functions, sensor_types)
                except RuntimeError:
                    sensor_types = ('EEG', 'EKG') if self.use_ekg else ('EEG',)
                    data, info_dict = self._load_file(file, preprocessing_functions, sensor_types)

                # Find normalization for the data
                mean = np.mean(data, dtype=np.float32)
                std = np.std(data, dtype=np.float32)
                info_dict['mean'] = float(mean)
                info_dict['std'] = float(std)

                name = '%s_%s_Age_%s_Gender_%s' % (str(info_dict['recording_date']), str(info_dict['sequence_name']),
                                                   str(info_dict['age']), info_dict['gender'])
                output_file_path = os.path.join(output_data_dir, name + '_raw.fif')
                output_info_path = os.path.join(output_info_dir, name + '.p')

                info = mne.create_info(ch_names, sfreq=self.sampling_freq)
                fif_array = mne.io.RawArray(data, info)
                fif_array.save(output_file_path)

                save_dict(info_dict, output_info_path)

                print('Split Type: %s, Progress: %g' % (split_type, (i+1)/len(split_files)))
예제 #4
0
def process(dataset_part):
    """Runs the conversion operation.

    Args:
      dataset_part: The dataset part to be converted [Nonsegmented, Segmented].
    """
    if dataset_part == 'Nonsegmented':
        _dir_raw = _DIR_RAW_NONSEGMENTED
        _dir_processed = _DIR_PROCESSED_NONSEGMENTED
        setname = 'Nonsegmented'
    else:
        _dir_raw = _DIR_RAW_SEGMENTED
        _dir_processed = _DIR_PROCESSED_SEGMENTED
        setname = 'Segmented'

    if _EXCLUDED_GRASSES:
        exclude_list = ['Black-grass', 'Common wheat', 'Loose Silky-bent']
    else:
        exclude_list = []

    # extract raw data
    data_filename = os.path.join(_dir_raw)
    archive = zipfile.ZipFile(data_filename)
    archive.extractall(_dir_processed)

    # list filenames and classes. Also divides filenames into equally sized shards
    filenames, class_names = _get_filenames_and_classes(
        _dir_processed, [setname], exclude_list)

    # save class dictionary
    class_dict = dict(zip(class_names, range(len(class_names))))
    utils.save_dict(class_dict, _dir_processed, 'class_dict.json')

    # convert images to tf records based on the list of filenames
    for shard_n in range(_NUM_SHARDS):
        utils.show_message('Processing shard %d/%d' %
                           (shard_n + 1, _NUM_SHARDS))
        tf_filename = _get_output_filename(_dir_processed, shard_n)

        with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
            _convert_to_tfrecord(filenames[shard_n], class_dict,
                                 tfrecord_writer)

    # clean up
    tmp_dir = os.path.join(_dir_processed, setname)
    tf.gfile.DeleteRecursively(tmp_dir)

    print('\nFinished converting the PSD %s dataset!' % setname)
    validation_stories, _, _ = read_babi(path_babi_base,
                                         to_read_val,
                                         args.babi_tasks,
                                         only_relevant=args.only_relevant)
    validation_stories = vectorize_babi(validation_stories, dictionary,
                                        args.batch_size_stories, device)

test_stories, _, _ = read_babi(path_babi_base,
                               to_read_test,
                               args.babi_tasks,
                               only_relevant=args.only_relevant)
test_stories = vectorize_babi(test_stories, dictionary,
                              args.batch_size_stories, device)

if not args.load:
    save_dict(dictionary)
else:
    dictionary = load_dict()

dict_size = len(dictionary)
print("Dictionary size: ", dict_size)
print("Done reading babi!")

lstm = LSTM(args.hidden_dim_lstm, args.batch_size_stories, dict_size,
            args.emb_dim, args.lstm_layers, device).to(device)

rn = RelationNetwork(args.hidden_dim_lstm, args.hidden_dims_g,
                     args.output_dim_g, args.hidden_dims_f, dict_size,
                     args.batch_size_stories, device).to(device)

if args.load:
예제 #6
0
def process(dataset_part):
    """Runs the download and conversion operation.

    Args:
      dataset_dir: The dataset directory where the dataset is stored.
    """
    if dataset_part == 'Nonsegmented':
        _dir_raw = _DIR_RAW_NONSEGMENTED
        _dir_processed = _DIR_PROCESSED_NONSEGMENTED
        setname = 'Nonsegmented'
        #training_filename = _get_output_filename(_DIR_PROCESSED_NONSEGMENTED, 'train')
        # testing_filename = _get_output_filename(_DIR_PROCESSED_NONSEGMENTED, 'test')
    else:
        _dir_raw = _DIR_RAW_SEGMENTED
        _dir_processed = _DIR_PROCESSED_SEGMENTED
        setname = 'Segmented' 
        #training_filename = _get_output_filename(_DIR_PROCESSED_SEGMENTED, 'train')
        # testing_filename = _get_output_filename(_DIR_PROCESSED_SEGMENTED, 'test')

    #if tf.gfile.Exists(training_filename): #and tf.gfile.Exists(testing_filename):
    #    print('Dataset files already exist. Exiting without re-creating them.')
    #    return


    if _EXCLUDED_GRASSES:
        exclude_list = ['Black-grass', 'Common wheat', 'Loose Silky-bent']
    else:
        exclude_list = []

    # First, process training data:

    data_filename = os.path.join(_dir_raw)
    archive = zipfile.ZipFile(data_filename)
    archive.extractall(_dir_processed)
    filenames, class_names = _get_filenames_and_classes(_dir_processed, [setname], exclude_list)

    class_dict = dict(zip(class_names, range(len(class_names))))
    utils.save_dict(class_dict, _dir_processed, 'class_dict.json')

    for shard_n in range(_NUM_SHARDS):
        utils.show_message('Processing shard %d/%d' % (shard_n+1,_NUM_SHARDS))
        tf_filename = _get_output_filename(_dir_processed, shard_n)

        with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
            _convert_to_tfrecord(filenames[shard_n], class_dict, tfrecord_writer)

    tmp_dir = os.path.join(_dir_processed, setname)
    tf.gfile.DeleteRecursively(tmp_dir)

    # # First, process test data:
    # with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
    #     data_filename = os.path.join(_dir_raw)
    #     archive = zipfile.ZipFile(data_filename)
    #     archive.extractall(_dir_processed)
    #     # filenames, class_names = _get_filenames_and_classes(_dir_processed, [setname, 'test'], exclude_list)
    #     class_dict = dict(zip(class_names, range(len(class_names))))

    #     _convert_to_tfrecord(filenames, class_dict, tfrecord_writer)

    #     tmp_dir = os.path.join(_dir_processed, setname)
    #     tf.gfile.DeleteRecursively(tmp_dir)

    print('\nFinished converting the PSD %s dataset!' % setname)