def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): if shift_ratio < 0: shift_ratio = self.shift_ratio with tf.name_scope('batch_processing'): # Build final results per split. images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] if use_datasets: ds = data_utils.create_dataset(self.batch_size, self.num_splits, self.batch_size_per_split, self.parse_and_preprocess, dataset, subset, self.train, cache_data) ds_iterator = data_utils.create_iterator(ds) for d in xrange(self.num_splits): labels[d], images[d] = ds_iterator.get_next() else: record_input = data_flow_ops.RecordInput( file_pattern=dataset.tf_record_pattern(subset), seed=301, parallelism=64, buffer_size=10000, batch_size=self.batch_size, shift_ratio=shift_ratio, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] for idx in xrange(self.batch_size): value = records[idx] (label, image) = self.parse_and_preprocess(value, idx) split_index = idx % self.num_splits labels[split_index].append(label) images[split_index].append(image) for split_index in xrange(self.num_splits): if not use_datasets: images[split_index] = tf.parallel_stack( images[split_index]) labels[split_index] = tf.concat(labels[split_index], 0) images[split_index] = tf.reshape(images[split_index], shape=[ self.batch_size_per_split, self.height, self.width, self.depth ]) labels[split_index] = tf.reshape(labels[split_index], [self.batch_size_per_split]) return images, labels
def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): if shift_ratio < 0: shift_ratio = self.shift_ratio self.example_decoder = tf_example_decoder.TfExampleDecoder() with tf.name_scope('batch_processing'): images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] glob_pattern = dataset.tf_record_pattern(subset) filenames = gfile.Glob(glob_pattern) ssd_input = ssd_dataloader.SSDInputReader(filenames, subset == 'train') ds = ssd_input({ 'batch_size_per_split': self.batch_size_per_split, 'num_splits': self.num_splits }) ds_iterator = data_utils.create_iterator(ds) for d in range(self.num_splits): images[d], labels[d] = ds_iterator.get_next() for split_index in xrange(self.num_splits): images[split_index] = tf.reshape(images[split_index], shape=[ self.batch_size_per_split, self.height, self.width, self.depth ]) labels[split_index] = tf.reshape( labels[split_index], # Encoded tensor with object category, number of bounding boxes and # their locations. The 0th dimension is batch size, and for each # item in batch the tensor looks like this ((x, y, w, h) is the # cordinates and size of a bounding box, c is class): # # [[x, y, w, h, c ], ^ # [x, y, w, h, c ], | # [..., ..., ..., ..., ... ], NUM_SSD_BOXES+1 # [x, y, w, h, c ], | # [nboxes, nboxes, nboxes, nboxes, nboxes]] v # # |<---------- 4 cordinates + 1 ---------->| shape=[ self.batch_size_per_split, ssd_constants.NUM_SSD_BOXES + 1, 5 ]) return images, labels
def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): try: import ssd_dataloader # pylint: disable=g-import-not-at-top except ImportError: raise ImportError( 'To use the COCO dataset, you must clone the ' 'repo https://github.com/tensorflow/models and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following https://github.com/tensorflow/models/blob/' 'master/research/object_detection/g3doc/installation.md' '#protobuf-compilation') if shift_ratio < 0: shift_ratio = self.shift_ratio with tf.name_scope('batch_processing'): images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] glob_pattern = dataset.tf_record_pattern(subset) filenames = gfile.Glob(glob_pattern) ssd_input = ssd_dataloader.SSDInputReader(filenames, subset == 'train') ds = ssd_input({ 'batch_size_per_split': self.batch_size_per_split, 'num_splits': self.num_splits }) ds_iterator = data_utils.create_iterator(ds) for d in range(self.num_splits): images[d], labels[d] = ds_iterator.get_next() for split_index in xrange(self.num_splits): images[split_index] = tf.reshape(images[split_index], shape=[ self.batch_size_per_split, self.height, self.width, self.depth ]) return images, labels
def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): try: from object_detection.data_decoders import tf_example_decoder # pylint: disable=g-import-not-at-top import ssd_dataloader # pylint: disable=g-import-not-at-top except ImportError: raise ImportError( 'To use the COCO dataset, you must clone the ' 'repo https://github.com/tensorflow/models and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following https://github.com/tensorflow/models/blob/' 'master/research/object_detection/g3doc/installation.md' '#protobuf-compilation') if shift_ratio < 0: shift_ratio = self.shift_ratio self.example_decoder = tf_example_decoder.TfExampleDecoder() with tf.name_scope('batch_processing'): images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] glob_pattern = dataset.tf_record_pattern(subset) filenames = gfile.Glob(glob_pattern) ssd_input = ssd_dataloader.SSDInputReader(filenames, subset == 'train') ds = ssd_input({ 'batch_size_per_split': self.batch_size_per_split, 'num_splits': self.num_splits }) ds_iterator = data_utils.create_iterator(ds) for d in range(self.num_splits): images[d], labels[d] = ds_iterator.get_next() for split_index in xrange(self.num_splits): images[split_index] = tf.reshape(images[split_index], shape=[ self.batch_size_per_split, self.height, self.width, self.depth ]) labels[split_index] = tf.reshape( labels[split_index], # Encoded tensor with object category, number of bounding boxes and # their locations. The 0th dimension is batch size, and for each # item in batch the tensor looks like this ((x, y, w, h) is the # cordinates and size of a bounding box, c is class): # # [[x, y, w, h, c ], ^ # [x, y, w, h, c ], | # [..., ..., ..., ..., ... ], NUM_SSD_BOXES+1 # [x, y, w, h, c ], | # [nboxes, nboxes, nboxes, nboxes, nboxes]] v # # |<---------- 4 cordinates + 1 ---------->| shape=[ self.batch_size_per_split, ssd_constants.NUM_SSD_BOXES + 1, 5 ]) return images, labels