def next_batch(self): if self.last_batch(): if self.should_shuffle: self.dataset = shuffle_dict_unison_inplace(self.dataset) self.reset() sub_batch = {} for features_name in self.dataset.features: sub_batch[features_name] = self.dataset.get( features_name, range(self.index, min(self.index + self.batch_size, self.total_size))) self.index += self.batch_size return sub_batch
def load_data( hdf5_file_path, input_features, output_features, split_data=True, shuffle_training=False ): logger.info('Loading data from: {0}'.format(hdf5_file_path)) # Load data from file hdf5_data = h5py.File(hdf5_file_path, 'r') dataset = {} for input_feature in input_features: if input_feature['type'] == TEXT: text_data_field = text_feature_data_field(input_feature) dataset[text_data_field] = hdf5_data[text_data_field].value else: dataset[input_feature['name']] = hdf5_data[ input_feature['name'] ].value for output_feature in output_features: if output_feature['type'] == TEXT: dataset[text_feature_data_field(output_feature)] = hdf5_data[ text_feature_data_field(output_feature) ].value else: dataset[output_feature['name']] = hdf5_data[ output_feature['name']].value if 'limit' in output_feature: dataset[output_feature['name']] = collapse_rare_labels( dataset[output_feature['name']], output_feature['limit'] ) if not split_data: hdf5_data.close() return dataset split = hdf5_data['split'].value hdf5_data.close() training_set, test_set, validation_set = split_dataset_tvt(dataset, split) # shuffle up if shuffle_training: training_set = data_utils.shuffle_dict_unison_inplace(training_set) return training_set, test_set, validation_set
def next_batch(self): if self.last_batch(): if self.should_shuffle: self.dataset = shuffle_dict_unison_inplace( self.dataset, np.random.RandomState(self.epoch)) self.reset() self.epoch += 1 sub_batch = {} for features_name in self.dataset.features: sub_batch[features_name] = self.dataset.get( features_name, range(self.index, min(self.index + self.batch_size, self.max_index))) self.index += self.batch_size self.step += 1 return sub_batch