예제 #1
0
    def minibatch(self,
                  dataset,
                  subset,
                  use_datasets,
                  cache_data,
                  shift_ratio=-1):
        if shift_ratio < 0:
            shift_ratio = self.shift_ratio
        with tf.name_scope('batch_processing'):
            # Build final results per split.
            images = [[] for _ in range(self.num_splits)]
            labels = [[] for _ in range(self.num_splits)]
            if use_datasets:
                ds = data_utils.create_dataset(self.batch_size,
                                               self.num_splits,
                                               self.batch_size_per_split,
                                               self.parse_and_preprocess,
                                               dataset, subset, self.train,
                                               cache_data)
                ds_iterator = data_utils.create_iterator(ds)
                for d in xrange(self.num_splits):
                    labels[d], images[d] = ds_iterator.get_next()

            else:
                record_input = data_flow_ops.RecordInput(
                    file_pattern=dataset.tf_record_pattern(subset),
                    seed=301,
                    parallelism=64,
                    buffer_size=10000,
                    batch_size=self.batch_size,
                    shift_ratio=shift_ratio,
                    name='record_input')
                records = record_input.get_yield_op()
                records = tf.split(records, self.batch_size, 0)
                records = [tf.reshape(record, []) for record in records]
                for idx in xrange(self.batch_size):
                    value = records[idx]
                    (label, image) = self.parse_and_preprocess(value, idx)
                    split_index = idx % self.num_splits
                    labels[split_index].append(label)
                    images[split_index].append(image)

            for split_index in xrange(self.num_splits):
                if not use_datasets:
                    images[split_index] = tf.parallel_stack(
                        images[split_index])
                    labels[split_index] = tf.concat(labels[split_index], 0)
                images[split_index] = tf.reshape(images[split_index],
                                                 shape=[
                                                     self.batch_size_per_split,
                                                     self.height, self.width,
                                                     self.depth
                                                 ])
                labels[split_index] = tf.reshape(labels[split_index],
                                                 [self.batch_size_per_split])
            return images, labels
예제 #2
0
    def minibatch(self,
                  dataset,
                  subset,
                  use_datasets,
                  cache_data,
                  shift_ratio=-1):
        if shift_ratio < 0:
            shift_ratio = self.shift_ratio
        self.example_decoder = tf_example_decoder.TfExampleDecoder()

        with tf.name_scope('batch_processing'):
            images = [[] for _ in range(self.num_splits)]
            labels = [[] for _ in range(self.num_splits)]

            glob_pattern = dataset.tf_record_pattern(subset)
            filenames = gfile.Glob(glob_pattern)
            ssd_input = ssd_dataloader.SSDInputReader(filenames,
                                                      subset == 'train')
            ds = ssd_input({
                'batch_size_per_split': self.batch_size_per_split,
                'num_splits': self.num_splits
            })
            ds_iterator = data_utils.create_iterator(ds)
            for d in range(self.num_splits):
                images[d], labels[d] = ds_iterator.get_next()
            for split_index in xrange(self.num_splits):
                images[split_index] = tf.reshape(images[split_index],
                                                 shape=[
                                                     self.batch_size_per_split,
                                                     self.height, self.width,
                                                     self.depth
                                                 ])
                labels[split_index] = tf.reshape(
                    labels[split_index],
                    # Encoded tensor with object category, number of bounding boxes and
                    # their locations. The 0th dimension is batch size, and for each
                    # item in batch the tensor looks like this ((x, y, w, h) is the
                    # cordinates and size of a bounding box, c is class):
                    #
                    # [[x,      y,      w,      h,      c     ],       ^
                    #  [x,      y,      w,      h,      c     ],       |
                    #  [...,    ...,    ...,    ...,    ...   ],  NUM_SSD_BOXES+1
                    #  [x,      y,      w,      h,      c     ],       |
                    #  [nboxes, nboxes, nboxes, nboxes, nboxes]]       v
                    #
                    # |<---------- 4 cordinates + 1 ---------->|
                    shape=[
                        self.batch_size_per_split,
                        ssd_constants.NUM_SSD_BOXES + 1, 5
                    ])
            return images, labels
    def minibatch(self,
                  dataset,
                  subset,
                  use_datasets,
                  cache_data,
                  shift_ratio=-1):
        try:
            import ssd_dataloader  # pylint: disable=g-import-not-at-top
        except ImportError:
            raise ImportError(
                'To use the COCO dataset, you must clone the '
                'repo https://github.com/tensorflow/models and add '
                'tensorflow/models and tensorflow/models/research to '
                'the PYTHONPATH, and compile the protobufs by '
                'following https://github.com/tensorflow/models/blob/'
                'master/research/object_detection/g3doc/installation.md'
                '#protobuf-compilation')

        if shift_ratio < 0:
            shift_ratio = self.shift_ratio

        with tf.name_scope('batch_processing'):
            images = [[] for _ in range(self.num_splits)]
            labels = [[] for _ in range(self.num_splits)]

            glob_pattern = dataset.tf_record_pattern(subset)
            filenames = gfile.Glob(glob_pattern)
            ssd_input = ssd_dataloader.SSDInputReader(filenames,
                                                      subset == 'train')
            ds = ssd_input({
                'batch_size_per_split': self.batch_size_per_split,
                'num_splits': self.num_splits
            })
            ds_iterator = data_utils.create_iterator(ds)
            for d in range(self.num_splits):
                images[d], labels[d] = ds_iterator.get_next()
            for split_index in xrange(self.num_splits):
                images[split_index] = tf.reshape(images[split_index],
                                                 shape=[
                                                     self.batch_size_per_split,
                                                     self.height, self.width,
                                                     self.depth
                                                 ])
            return images, labels
예제 #4
0
    def minibatch(self,
                  dataset,
                  subset,
                  use_datasets,
                  cache_data,
                  shift_ratio=-1):
        try:
            from object_detection.data_decoders import tf_example_decoder  # pylint: disable=g-import-not-at-top
            import ssd_dataloader  # pylint: disable=g-import-not-at-top
        except ImportError:
            raise ImportError(
                'To use the COCO dataset, you must clone the '
                'repo https://github.com/tensorflow/models and add '
                'tensorflow/models and tensorflow/models/research to '
                'the PYTHONPATH, and compile the protobufs by '
                'following https://github.com/tensorflow/models/blob/'
                'master/research/object_detection/g3doc/installation.md'
                '#protobuf-compilation')

        if shift_ratio < 0:
            shift_ratio = self.shift_ratio
        self.example_decoder = tf_example_decoder.TfExampleDecoder()

        with tf.name_scope('batch_processing'):
            images = [[] for _ in range(self.num_splits)]
            labels = [[] for _ in range(self.num_splits)]

            glob_pattern = dataset.tf_record_pattern(subset)
            filenames = gfile.Glob(glob_pattern)
            ssd_input = ssd_dataloader.SSDInputReader(filenames,
                                                      subset == 'train')
            ds = ssd_input({
                'batch_size_per_split': self.batch_size_per_split,
                'num_splits': self.num_splits
            })
            ds_iterator = data_utils.create_iterator(ds)
            for d in range(self.num_splits):
                images[d], labels[d] = ds_iterator.get_next()
            for split_index in xrange(self.num_splits):
                images[split_index] = tf.reshape(images[split_index],
                                                 shape=[
                                                     self.batch_size_per_split,
                                                     self.height, self.width,
                                                     self.depth
                                                 ])
                labels[split_index] = tf.reshape(
                    labels[split_index],
                    # Encoded tensor with object category, number of bounding boxes and
                    # their locations. The 0th dimension is batch size, and for each
                    # item in batch the tensor looks like this ((x, y, w, h) is the
                    # cordinates and size of a bounding box, c is class):
                    #
                    # [[x,      y,      w,      h,      c     ],       ^
                    #  [x,      y,      w,      h,      c     ],       |
                    #  [...,    ...,    ...,    ...,    ...   ],  NUM_SSD_BOXES+1
                    #  [x,      y,      w,      h,      c     ],       |
                    #  [nboxes, nboxes, nboxes, nboxes, nboxes]]       v
                    #
                    # |<---------- 4 cordinates + 1 ---------->|
                    shape=[
                        self.batch_size_per_split,
                        ssd_constants.NUM_SSD_BOXES + 1, 5
                    ])
            return images, labels