예제 #1
0
    def __call__(self, params, input_context=None, batch_size=None):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder(
            include_mask='segmentation' in params['heads'],
            regenerate_source_id=params['regenerate_source_id'])

        batch_size = batch_size or params['batch_size']
        seed = params.get('tf_random_seed', None)
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training,
                                             seed=seed)
        if input_context:
            dataset = dataset.shard(input_context.num_input_pipelines,
                                    input_context.input_pipeline_id)
        # Prefetch data from files.
        def _prefetch_dataset(filename):
            if params.get('dataset_type', None) == 'sstable':
                pass
            else:
                dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.interleave(_prefetch_dataset,
                                     num_parallel_calls=tf.data.AUTOTUNE,
                                     deterministic=bool(seed))
        dataset = dataset.with_options(self.dataset_options)
        if self._is_training:
            dataset = dataset.shuffle(64, seed=seed)

        # Parse the fetched records to input tensors for model function.
        # pylint: disable=g-long-lambda
        if params.get('dataset_type', None) == 'sstable':
            map_fn = lambda key, value: self.dataset_parser(
                value, example_decoder, anchor_labeler, params)
        else:
            map_fn = lambda value: self.dataset_parser(value, example_decoder,
                                                       anchor_labeler, params)
        # pylint: enable=g-long-lambda
        dataset = dataset.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.batch(batch_size,
                                drop_remainder=params['drop_remainder'])
        dataset = dataset.map(
            lambda *args: self.process_example(params, batch_size, *args))
        dataset = dataset.prefetch(tf.data.AUTOTUNE)
        if self._is_training:
            dataset = dataset.repeat()
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #2
0
 def __init__(self, iou_loss_type, min_level, max_level, num_scales,
              aspect_ratios, anchor_scale, image_size, **kwargs):
     super().__init__(**kwargs)
     self.iou_loss_type = iou_loss_type
     self.input_anchors = anchors.Anchors(min_level, max_level, num_scales,
                                          aspect_ratios, anchor_scale,
                                          image_size)
예제 #3
0
def tflite_pre_nms(params, cls_outputs, box_outputs):
    """Pre-NMS that is compatible with TFLite's custom NMS op.

  For details, see tensorflow/lite/kernels/detection_postprocess.cc

  Args:
    params: a dict of parameters.
    cls_outputs: a list of tensors for classes, each tensor denotes a level of
      logits with shape [1, H, W, num_class * num_anchors].
    box_outputs: a list of tensors for boxes, each tensor ddenotes a level of
      boxes with shape [1, H, W, 4 * num_anchors]. Each box format is [y_min,
      x_min, y_max, x_man].

  Returns:
    boxes: boxes encoded as {y_center, x_center, height, width}
    scores: scores converted from `cls_outputs` logits using sigmoid
    anchors: normalized anchors encoded as {y_center, x_center, height, width}
  """
    cls_outputs = to_list(cls_outputs)
    box_outputs = to_list(box_outputs)
    cls_outputs, box_outputs = merge_class_box_level_outputs(
        params, cls_outputs, box_outputs)
    eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                   params['num_scales'],
                                   params['aspect_ratios'],
                                   params['anchor_scale'],
                                   params['image_size'])

    # TODO(b/175166514): Consider computing Top-K boxes & anchors here. We don't
    # do this currently since the resultant graph does not support TFLite
    # delegates well. `topk_class_boxes` won't work as-is, since the outputs
    # will need to be modified appropriately for TFLite op's consumption.

    # TFLite's object detection APIs require normalized anchors.
    height, width = utils.parse_image_size(params['image_size'])
    normalize_factor = tf.constant([height, width, height, width],
                                   dtype=tf.float32)
    normalized_anchors = eval_anchors.boxes / normalize_factor
    decoded_anchors = anchors.decode_anchors_to_centersize(
        box_outputs, normalized_anchors)

    # convert logits to scores.
    scores = tf.math.sigmoid(cls_outputs)

    return box_outputs, scores, decoded_anchors
예제 #4
0
 def test_parser(self):
     tf.random.set_seed(111111)
     params = hparams_config.get_detection_config(
         'efficientdet-d0').as_dict()
     input_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
     anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                            params['num_classes'])
     example_decoder = tf_example_decoder.TfExampleDecoder(
         regenerate_source_id=params['regenerate_source_id'])
     tfrecord_path = test_util.make_fake_tfrecord(self.get_temp_dir())
     dataset = tf.data.TFRecordDataset([tfrecord_path])
     value = next(iter(dataset))
     reader = dataloader.InputReader(tfrecord_path, True)
     result = reader.dataset_parser(value, example_decoder, anchor_labeler,
                                    params)
     self.assertEqual(len(result), 11)
예제 #5
0
def pre_nms(params, cls_outputs, box_outputs, topk=True):
    """Detection post processing before nms.

  It takes the multi-level class and box predictions from network, merge them
  into unified tensors, and compute boxes, scores, and classes.

  Args:
    params: a dict of parameters.
    cls_outputs: a list of tensors for classes, each tensor denotes a level of
      logits with shape [N, H, W, num_class * num_anchors].
    box_outputs: a list of tensors for boxes, each tensor ddenotes a level of
      boxes with shape [N, H, W, 4 * num_anchors].
    topk: if True, select topk before nms (mainly to speed up nms).

  Returns:
    A tuple of (boxes, scores, classes).
  """
    # get boxes by apply bounding box regression to anchors.
    eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                   params['num_scales'],
                                   params['aspect_ratios'],
                                   params['anchor_scale'],
                                   params['image_size'])

    cls_outputs, box_outputs = merge_class_box_level_outputs(
        params, cls_outputs, box_outputs)

    if topk:
        # select topK purely based on scores before NMS, in order to speed up nms.
        cls_outputs, box_outputs, classes, indices = topk_class_boxes(
            params, cls_outputs, box_outputs)
        anchor_boxes = tf.gather(eval_anchors.boxes, indices)
    else:
        anchor_boxes = eval_anchors.boxes
        classes = None

    boxes = anchors.decode_box_outputs(box_outputs, anchor_boxes)
    # convert logits to scores.
    scores = tf.math.sigmoid(cls_outputs)
    return boxes, scores, classes