def test_fails_with_nested_input(self):
   def fn(input_tensor):
     return input_tensor
   input_tensor1 = tf.constant([1])
   input_tensor2 = tf.constant([2])
   with self.assertRaisesRegexp(
       ValueError, '`elems` must be a Tensor or list of Tensors.'):
     shape_utils.static_or_dynamic_map_fn(
         fn, [input_tensor1, [input_tensor2]], dtype=tf.float32)
  def test_with_multiple_dynamic_shapes(self):
    def fn(elems):
      input_tensor, scalar_index_tensor = elems
      return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]), [])

    input_tensor = tf.placeholder(tf.float32, shape=(None, 3))
    scalar_index_tensor = tf.placeholder(tf.int32, shape=(None, 1))
    map_fn_output = shape_utils.static_or_dynamic_map_fn(
        fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

    op_names = [op.name for op in tf.get_default_graph().get_operations()]
    self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

    with self.test_session() as sess:
      result1 = sess.run(
          map_fn_output, feed_dict={
              input_tensor: [[1, 2, 3], [4, 5, -1], [0, 6, 9]],
              scalar_index_tensor: [[0], [2], [1]],
          })
      result2 = sess.run(
          map_fn_output, feed_dict={
              input_tensor: [[-1, 1, 0], [3, 9, 30]],
              scalar_index_tensor: [[1], [0]]
          })
      self.assertAllEqual(result1, [1, -1, 6])
      self.assertAllEqual(result2, [1, 3])
Esempio n. 3
0
 def graph_fn():
   input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
                              dtype=tf.float32)
   scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
   map_fn_output = shape_utils.static_or_dynamic_map_fn(
       fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)
   return map_fn_output
Esempio n. 4
0
def normalized_to_image_coordinates(normalized_boxes, image_shape,
                                    parallel_iterations=32):
  """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
      normalized coordinates.
    image_shape: a float32 tensor of shape [4] containing the image shape.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
      boxes in image coordinates.
  """
  def _to_absolute_coordinates(normalized_boxes):
    return box_list_ops.to_absolute_coordinates(
        box_list.BoxList(normalized_boxes),
        image_shape[1], image_shape[2], check_range=False).get()

  absolute_boxes = shape_utils.static_or_dynamic_map_fn(
      _to_absolute_coordinates,
      elems=(normalized_boxes),
      dtype=tf.float32,
      parallel_iterations=parallel_iterations,
      back_prop=True)
  return absolute_boxes
Esempio n. 5
0
def normalized_to_image_coordinates(normalized_boxes, image_shape,
                                    parallel_iterations=32):
  """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a tensor of shape [None, num_boxes, 4] in
      normalized coordinates. The dtype of this tensor must support tf.mul.
    image_shape: a tensor of shape [4] containing the image shape, with same
      dtype as `normalized_boxes`.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a tensor of shape [None, num_boxes, 4] containing
      the boxes in image coordinates, with same
      dtype as `normalized_boxes`.
  """
  x_scale = tf.cast(image_shape[2], normalized_boxes.dtype)
  y_scale = tf.cast(image_shape[1], normalized_boxes.dtype)
  def _to_absolute_coordinates(normalized_boxes):
    y_min, x_min, y_max, x_max = tf.split(
        value=normalized_boxes, num_or_size_splits=4, axis=1)
    y_min = y_scale * y_min
    y_max = y_scale * y_max
    x_min = x_scale * x_min
    x_max = x_scale * x_max
    scaled_boxes = tf.concat([y_min, x_min, y_max, x_max], 1)
    return scaled_boxes

  absolute_boxes = shape_utils.static_or_dynamic_map_fn(
      _to_absolute_coordinates,
      elems=(normalized_boxes),
      dtype=normalized_boxes.dtype,
      parallel_iterations=parallel_iterations,
      back_prop=True)
  return absolute_boxes
Esempio n. 6
0
def normalized_to_image_coordinates(normalized_boxes,
                                    image_shape,
                                    parallel_iterations=32):
    """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
      normalized coordinates.
    image_shape: a float32 tensor of shape [4] containing the image shape.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
      boxes in image coordinates.
  """
    def _to_absolute_coordinates(normalized_boxes):
        return box_list_ops.to_absolute_coordinates(
            box_list.BoxList(normalized_boxes),
            image_shape[1],
            image_shape[2],
            check_range=False).get()

    absolute_boxes = shape_utils.static_or_dynamic_map_fn(
        _to_absolute_coordinates,
        elems=(normalized_boxes),
        dtype=tf.float32,
        parallel_iterations=parallel_iterations,
        back_prop=True)
    return absolute_boxes
Esempio n. 7
0
def normalized_to_image_coordinates(normalized_boxes, image_shape,
                                    parallel_iterations=32):
  """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
      normalized coordinates.
    image_shape: a float32 tensor of shape [4] containing the image shape.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containing
      the boxes in image coordinates.
  """
  x_scale = tf.cast(image_shape[2], tf.float32)
  y_scale = tf.cast(image_shape[1], tf.float32)
  def _to_absolute_coordinates(normalized_boxes):
    y_min, x_min, y_max, x_max = tf.split(
        value=normalized_boxes, num_or_size_splits=4, axis=1)
    y_min = y_scale * y_min
    y_max = y_scale * y_max
    x_min = x_scale * x_min
    x_max = x_scale * x_max
    scaled_boxes = tf.concat([y_min, x_min, y_max, x_max], 1)
    return scaled_boxes

  absolute_boxes = shape_utils.static_or_dynamic_map_fn(
      _to_absolute_coordinates,
      elems=(normalized_boxes),
      dtype=tf.float32,
      parallel_iterations=parallel_iterations,
      back_prop=True)
  return absolute_boxes
Esempio n. 8
0
def _tf_example_input_placeholder(input_shape=None):
    """Returns input that accepts a batch of strings with tf examples.

  Args:
    input_shape: the shape to resize the output decoded images to (optional).

  Returns:
    a tuple of input placeholder and the output decoded images.
  """
    batch_tf_example_placeholder = tf.placeholder(tf.string,
                                                  shape=[None],
                                                  name='tf_example')

    def decode(tf_example_string_tensor):
        tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
            tf_example_string_tensor)
        image_tensor = tensor_dict[fields.InputDataFields.image]
        if input_shape is not None:
            image_tensor = tf.image.resize(image_tensor, input_shape[1:3])
        return image_tensor

    return (batch_tf_example_placeholder,
            shape_utils.static_or_dynamic_map_fn(
                decode,
                elems=batch_tf_example_placeholder,
                dtype=tf.uint8,
                parallel_iterations=32,
                back_prop=False))
  def test_with_static_shape(self):
    def fn(input_tensor):
      return tf.reduce_sum(input_tensor)
    input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
    map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

    op_names = [op.name for op in tf.get_default_graph().get_operations()]
    self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

    with self.test_session() as sess:
      result = sess.run(map_fn_output)
      self.assertAllEqual(result, [3, 4, 4])
  def test_with_multiple_static_shapes(self):
    def fn(elems):
      input_tensor, scalar_index_tensor = elems
      return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]), [])

    input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
                               dtype=tf.float32)
    scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
    map_fn_output = shape_utils.static_or_dynamic_map_fn(
        fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

    op_names = [op.name for op in tf.get_default_graph().get_operations()]
    self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

    with self.test_session() as sess:
      result = sess.run(map_fn_output)
      self.assertAllEqual(result, [1, -1, 6])
Esempio n. 11
0
def batch_position_sensitive_crop_regions(images,
                                          boxes,
                                          crop_size,
                                          num_spatial_bins,
                                          global_pool,
                                          parallel_iterations=64):
  """Position sensitive crop with batches of images and boxes.

  This op is exactly like `position_sensitive_crop_regions` below but operates
  on batches of images and boxes. See `position_sensitive_crop_regions` function
  below for the operation applied per batch element.

  Args:
    images: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 3-D tensor of shape `[batch, num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: See `position_sensitive_crop_regions` below.
    num_spatial_bins: See `position_sensitive_crop_regions` below.
    global_pool: See `position_sensitive_crop_regions` below.
    parallel_iterations: Number of batch items to process in parallel.

  Returns:
  """
  def _position_sensitive_crop_fn(inputs):
    images, boxes = inputs
    return position_sensitive_crop_regions(
        images,
        boxes,
        crop_size=crop_size,
        num_spatial_bins=num_spatial_bins,
        global_pool=global_pool)

  return shape_utils.static_or_dynamic_map_fn(
      _position_sensitive_crop_fn,
      elems=[images, boxes],
      dtype=tf.float32,
      parallel_iterations=parallel_iterations)
def batch_position_sensitive_crop_regions(images,
                                          boxes,
                                          crop_size,
                                          num_spatial_bins,
                                          global_pool,
                                          parallel_iterations=64):
    """Position sensitive crop with batches of images and boxes.

  This op is exactly like `position_sensitive_crop_regions` below but operates
  on batches of images and boxes. See `position_sensitive_crop_regions` function
  below for the operation applied per batch element.

  Args:
    images: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 3-D tensor of shape `[batch, num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: See `position_sensitive_crop_regions` below.
    num_spatial_bins: See `position_sensitive_crop_regions` below.
    global_pool: See `position_sensitive_crop_regions` below.
    parallel_iterations: Number of batch items to process in parallel.

  Returns:
  """
    def _position_sensitive_crop_fn(inputs):
        images, boxes = inputs
        return position_sensitive_crop_regions(
            images,
            boxes,
            crop_size=crop_size,
            num_spatial_bins=num_spatial_bins,
            global_pool=global_pool)

    return shape_utils.static_or_dynamic_map_fn(
        _position_sensitive_crop_fn,
        elems=[images, boxes],
        dtype=tf.float32,
        parallel_iterations=parallel_iterations)
Esempio n. 13
0
def giou(boxes1, boxes2):
  """Computes generalized IOU between two tensors.

  Each box should be represented as [ymin, xmin, ymax, xmax].

  Args:
    boxes1: a tensor with shape [num_boxes, 4]
    boxes2: a tensor with shape [num_boxes, 4]

  Returns:
    a tensor of shape [num_boxes] containing GIoUs

  """
  def _two_boxes_giou(boxes):
    """Compute giou between two boxes."""
    boxes1, boxes2 = boxes

    pred_ymin, pred_xmin, pred_ymax, pred_xmax = tf.unstack(boxes1)
    gt_ymin, gt_xmin, gt_ymax, gt_xmax = tf.unstack(boxes2)

    gt_area = (gt_ymax - gt_ymin) * (gt_xmax - gt_xmin)
    pred_area = (pred_ymax - pred_ymin) * (pred_xmax - pred_xmin)

    x1_i = tf.maximum(pred_xmin, gt_xmin)
    x2_i = tf.minimum(pred_xmax, gt_xmax)
    y1_i = tf.maximum(pred_ymin, gt_ymin)
    y2_i = tf.minimum(pred_ymax, gt_ymax)
    intersection_area = tf.maximum(0.0, y2_i - y1_i) * tf.maximum(0.0,
                                                                  x2_i - x1_i)

    x1_c = tf.minimum(pred_xmin, gt_xmin)
    x2_c = tf.maximum(pred_xmax, gt_xmax)
    y1_c = tf.minimum(pred_ymin, gt_ymin)
    y2_c = tf.maximum(pred_ymax, gt_ymax)
    hull_area = (y2_c - y1_c) * (x2_c - x1_c)

    union_area = gt_area + pred_area - intersection_area
    iou = tf.where(
        tf.equal(union_area, 0.0), 0.0, intersection_area / union_area)
    giou_ = iou - tf.where(hull_area > 0.0,
                           (hull_area - union_area) / hull_area, iou)

    return giou_

  return shape_utils.static_or_dynamic_map_fn(_two_boxes_giou, [boxes1, boxes2])
  def test_with_dynamic_shape(self):
    def fn(input_tensor):
      return tf.reduce_sum(input_tensor)
    input_tensor = tf.placeholder(tf.float32, shape=(None, 2))
    map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

    op_names = [op.name for op in tf.get_default_graph().get_operations()]
    self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

    with self.test_session() as sess:
      result1 = sess.run(
          map_fn_output, feed_dict={
              input_tensor: [[1, 2], [3, 1], [0, 4]]})
      result2 = sess.run(
          map_fn_output, feed_dict={
              input_tensor: [[-1, 1], [0, 9]]})
      self.assertAllEqual(result1, [3, 4, 4])
      self.assertAllEqual(result2, [0, 9])
Esempio n. 15
0
def _tf_example_input_placeholder():
  """Returns input that accepts a batch of strings with tf examples.

  Returns:
    a tuple of input placeholder and the output decoded images.
  """
  batch_tf_example_placeholder = tf.placeholder(
      tf.string, shape=[None], name='tf_example')
  def decode(tf_example_string_tensor):
    tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
        tf_example_string_tensor)
    image_tensor = tensor_dict[fields.InputDataFields.image]
    return image_tensor
  return (batch_tf_example_placeholder,
          shape_utils.static_or_dynamic_map_fn(
              decode,
              elems=batch_tf_example_placeholder,
              dtype=tf.uint8,
              parallel_iterations=32,
              back_prop=False))
Esempio n. 16
0
  def preprocess(self, inputs):
    """Feature-extractor specific preprocessing.

    SSD meta architecture uses a default clip_window of [0, 0, 1, 1] during
    post-processing. On calling `preprocess` method, clip_window gets updated
    based on `true_image_shapes` returned by `image_resizer_fn`.

    Args:
      inputs: a [batch, height_in, width_in, channels] float tensor representing
        a batch of images with values between 0 and 255.0.

    Returns:
      preprocessed_inputs: a [batch, height_out, width_out, channels] float
        tensor representing a batch of images.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Raises:
      ValueError: if inputs tensor does not have type tf.float32
    """
    if inputs.dtype is not tf.float32:
      raise ValueError('`preprocess` expects a tf.float32 tensor')
    with tf.device('/cpu:0'): #Put Preprocessing on CPU #ADDED BY GUSTAVZ
        with tf.name_scope('Preprocessor'):
          # TODO(jonathanhuang): revisit whether to always use batch size as
          # the number of parallel iterations vs allow for dynamic batching.
          outputs = shape_utils.static_or_dynamic_map_fn(
              self._image_resizer_fn,
              elems=inputs,
              dtype=[tf.float32, tf.int32])
          resized_inputs = outputs[0]
          true_image_shapes = outputs[1]

          return (self._feature_extractor.preprocess(resized_inputs),
                  true_image_shapes)
Esempio n. 17
0
  def preprocess(self, inputs):
    """Feature-extractor specific preprocessing.

    SSD meta architecture uses a default clip_window of [0, 0, 1, 1] during
    post-processing. On calling `preprocess` method, clip_window gets updated
    based on `true_image_shapes` returned by `image_resizer_fn`.

    Args:
      inputs: a [batch, height_in, width_in, channels] float tensor representing
        a batch of images with values between 0 and 255.0.

    Returns:
      preprocessed_inputs: a [batch, height_out, width_out, channels] float
        tensor representing a batch of images.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Raises:
      ValueError: if inputs tensor does not have type tf.float32
    """
    if inputs.dtype is not tf.float32:
      raise ValueError('`preprocess` expects a tf.float32 tensor')
    with tf.name_scope('Preprocessor'):
      # TODO: revisit whether to always use batch size as
      # the number of parallel iterations vs allow for dynamic batching.
      outputs = shape_utils.static_or_dynamic_map_fn(
          self._image_resizer_fn,
          elems=inputs,
          dtype=[tf.float32, tf.int32])
      resized_inputs = outputs[0]
      true_image_shapes = outputs[1]

      return (self._feature_extractor.preprocess(resized_inputs),
              true_image_shapes)
Esempio n. 18
0
def batch_multiclass_non_max_suppression(boxes,
                                         boxes_3d,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         additional_fields=None,
                                         scope=None,
                                         use_static_shapes=False,
                                         parallel_iterations=32,
                                         use_class_agnostic_nms=False,
                                         max_classes_per_detection=1):
    """Multi-class version of non maximum suppression that operates on a batch.

  This op is similar to `multiclass_non_max_suppression` but operates on a batch
  of boxes and scores. See documentation for `multiclass_non_max_suppression`
  for details.

  Args:
    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
      detections. If `q` is 1 then same boxes are used for all classes
      otherwise, if `q` is equal to number of classes, class-specific boxes are
      used.
    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
      the scores for each of the `num_anchors` detections. The scores have to be
      non-negative when use_static_shapes is set True.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
      of the form [y_min, x_min, y_max, x_max] representing the window to clip
      boxes to before performing non-max suppression. This argument can also be
      a tensor of shape [4] in which case, the same clip window is applied to
      all images in the batch. If clip_widow is None, all boxes are used to
      perform non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window is
      provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered for
      each image in the batch.  This parameter allows for ignoring zero
      paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose dimensions are [batch_size, num_anchors, ...].
    scope: tf scope name.
    use_static_shapes: If true, the output nmsed boxes are padded to be of
      length `max_size_per_class` and it doesn't clip boxes to max_total_size.
      Defaults to false.
    parallel_iterations: (optional) number of batch items to process in
      parallel.
    use_class_agnostic_nms: If true, this uses class-agnostic non max
      suppression
    max_classes_per_detection: Maximum number of retained classes per detection
      box in class-agnostic NMS.

  Returns:
    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
      containing the non-max suppressed boxes.
    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
      the scores for the boxes.
    'nmsed_classes': A [batch_size, max_detections] float32 tensor
      containing the class for boxes.
    'nmsed_masks': (optional) a
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box. This is set to None if input
      `masks` is None.
    'nmsed_additional_fields': (optional) a dictionary of
      [batch_size, max_detections, ...] float32 tensors corresponding to the
      tensors specified in the input `additional_fields`. This is not returned
      if input `additional_fields` is None.
    'num_detections': A [batch_size] int32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
      entries are zero paddings.

  Raises:
    ValueError: if `q` in boxes.shape is not 1 or not equal to number of
      classes as inferred from scores.shape.
  """
    q = shape_utils.get_dim_as_int(boxes.shape[2])
    num_classes = shape_utils.get_dim_as_int(scores.shape[2])
    if q != 1 and q != num_classes:
        raise ValueError('third dimension of boxes must be either 1 or equal '
                         'to the third dimension of scores')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    # Create ordered dictionary using the sorted keys from
    # additional fields to ensure getting the same key value assignment
    # in _single_image_nms_fn(). The dictionary is thus a sorted version of
    # additional_fields.
    if additional_fields is None:
        ordered_additional_fields = {}
    else:
        ordered_additional_fields = collections.OrderedDict(
            sorted(additional_fields.items(), key=lambda item: item[0]))
    del additional_fields
    with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
        boxes_shape = boxes.shape
        batch_size = shape_utils.get_dim_as_int(boxes_shape[0])
        num_anchors = shape_utils.get_dim_as_int(boxes_shape[1])

        if batch_size is None:
            batch_size = tf.shape(boxes)[0]
        if num_anchors is None:
            num_anchors = tf.shape(boxes)[1]

        # If num valid boxes aren't provided, create one and mark all boxes as
        # valid.
        if num_valid_boxes is None:
            num_valid_boxes = tf.ones([batch_size],
                                      dtype=tf.int32) * num_anchors

        if clip_window is None:
            clip_window = tf.stack([
                tf.reduce_min(boxes[:, :, :, 0]),
                tf.reduce_min(boxes[:, :, :, 1]),
                tf.reduce_max(boxes[:, :, :, 2]),
                tf.reduce_max(boxes[:, :, :, 3])
            ])
        if clip_window.shape.ndims == 1:
            clip_window = tf.tile(tf.expand_dims(clip_window, 0),
                                  [batch_size, 1])

        def _single_image_nms_fn(args):
            """Runs NMS on a single image and returns padded output.

      Args:
        args: A list of tensors consisting of the following:
          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
            detections. If `q` is 1 then same boxes are used for all classes
            otherwise, if `q` is equal to number of classes, class-specific
            boxes are used.
          per_image_scores - A [num_anchors, num_classes] float32 tensor
            containing the scores for each of the `num_anchors` detections.
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
          per_image_clip_window - A 1D float32 tensor of the form
            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
            shape [batch_size] representing the number of valid boxes to be
            considered for each image in the batch.  This parameter allows for
            ignoring zero paddings.

      Returns:
        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
          non-max suppressed boxes.
        'nmsed_scores': A [max_detections] float32 tensor containing the scores
          for the boxes.
        'nmsed_classes': A [max_detections] float32 tensor containing the class
          for boxes.
        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
          float32 tensor containing masks for each selected box. This is set to
          None if input `masks` is None.
        'nmsed_additional_fields':  (optional) A variable number of float32
          tensors each with size [max_detections, ...] corresponding to the
          input `per_image_additional_fields`.
        'num_detections': A [batch_size] int32 tensor indicating the number of
          valid detections per batch item. Only the top num_detections[i]
          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
          rest of the entries are zero paddings.
      """
            per_image_boxes = args[0]
            per_image_boxes_3d = args[1]
            per_image_scores = args[2]
            per_image_clip_window = args[3]
            # Make sure that the order of elements passed in args is aligned with
            # the iteration order of ordered_additional_fields
            per_image_additional_fields = {
                key: value
                for key, value in zip(ordered_additional_fields, args[4:-1])
            }
            per_image_num_valid_boxes = args[-1]
            if use_static_shapes:
                total_proposals = tf.shape(per_image_scores)
                per_image_scores = tf.where(
                    tf.less(tf.range(total_proposals[0]),
                            per_image_num_valid_boxes), per_image_scores,
                    tf.fill(total_proposals,
                            np.finfo('float32').min))
            else:
                per_image_boxes = tf.reshape(
                    tf.slice(per_image_boxes, 3 * [0],
                             tf.stack([per_image_num_valid_boxes, -1, -1])),
                    [-1, q, 4])
                per_image_boxes_3d = tf.reshape(
                    tf.slice(per_image_boxes_3d, 3 * [0],
                             tf.stack([per_image_num_valid_boxes, -1, -1])),
                    [-1, q, 6])
                per_image_scores = tf.reshape(
                    tf.slice(per_image_scores, [0, 0],
                             tf.stack([per_image_num_valid_boxes, -1])),
                    [-1, num_classes])
                if per_image_additional_fields is not None:
                    for key, tensor in per_image_additional_fields.items():
                        additional_field_shape = tensor.get_shape()
                        additional_field_dim = len(additional_field_shape)
                        per_image_additional_fields[key] = tf.reshape(
                            tf.slice(
                                per_image_additional_fields[key],
                                additional_field_dim * [0],
                                tf.stack([per_image_num_valid_boxes] +
                                         (additional_field_dim - 1) * [-1])),
                            [-1] + [
                                shape_utils.get_dim_as_int(dim)
                                for dim in additional_field_shape[1:]
                            ])
            if use_class_agnostic_nms:
                nmsed_boxlist, num_valid_nms_boxes = class_agnostic_non_max_suppression(
                    per_image_boxes,
                    per_image_boxes_3d,
                    per_image_scores,
                    score_thresh,
                    iou_thresh,
                    max_classes_per_detection,
                    max_total_size,
                    clip_window=per_image_clip_window,
                    change_coordinate_frame=change_coordinate_frame,
                    pad_to_max_output_size=use_static_shapes,
                    additional_fields=per_image_additional_fields)
            else:
                nmsed_boxlist, num_valid_nms_boxes = multiclass_non_max_suppression(
                    per_image_boxes,
                    per_image_boxes_3d,
                    per_image_scores,
                    score_thresh,
                    iou_thresh,
                    max_size_per_class,
                    max_total_size,
                    clip_window=per_image_clip_window,
                    change_coordinate_frame=change_coordinate_frame,
                    pad_to_max_output_size=use_static_shapes,
                    additional_fields=per_image_additional_fields)

            if not use_static_shapes:
                nmsed_boxlist = box_list_ops.pad_or_clip_box_list(
                    nmsed_boxlist, max_total_size)
            num_detections = num_valid_nms_boxes
            nmsed_boxes = nmsed_boxlist.get()
            nmsed_boxes_3d = nmsed_boxlist.get_field(
                fields.BoxListFields.boxes_3d)
            nmsed_scores = nmsed_boxlist.get_field(fields.BoxListFields.scores)
            nmsed_classes = nmsed_boxlist.get_field(
                fields.BoxListFields.classes)
            nmsed_additional_fields = []
            # Sorting is needed here to ensure that the values stored in
            # nmsed_additional_fields are always kept in the same order
            # across different execution runs.
            for key in sorted(per_image_additional_fields.keys()):
                nmsed_additional_fields.append(nmsed_boxlist.get_field(key))
            return (
                [nmsed_boxes, nmsed_boxes_3d, nmsed_scores, nmsed_classes] +
                nmsed_additional_fields + [num_detections])

        num_additional_fields = 0
        if ordered_additional_fields:
            num_additional_fields = len(ordered_additional_fields)
        num_nmsed_outputs = 4 + num_additional_fields

        batch_outputs = shape_utils.static_or_dynamic_map_fn(
            _single_image_nms_fn,
            elems=([boxes, boxes_3d, scores, clip_window] +
                   list(ordered_additional_fields.values()) +
                   [num_valid_boxes]),
            dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
            parallel_iterations=parallel_iterations)

        batch_nmsed_boxes = batch_outputs[0]
        batch_nmsed_boxes_3d = batch_outputs[1]
        batch_nmsed_scores = batch_outputs[2]
        batch_nmsed_classes = batch_outputs[3]
        batch_nmsed_values = batch_outputs[4:-1]

        batch_nmsed_additional_fields = {}
        if num_additional_fields > 0:
            # Sort the keys to ensure arranging elements in same order as
            # in _single_image_nms_fn.
            batch_nmsed_keys = list(ordered_additional_fields.keys())
            for i in range(len(batch_nmsed_keys)):
                batch_nmsed_additional_fields[
                    batch_nmsed_keys[i]] = batch_nmsed_values[i]

        batch_num_detections = batch_outputs[-1]

        if not ordered_additional_fields:
            batch_nmsed_additional_fields = None

        return (batch_nmsed_boxes, batch_nmsed_boxes_3d, batch_nmsed_scores,
                batch_nmsed_classes, batch_nmsed_additional_fields,
                batch_num_detections)
Esempio n. 19
0
 def graph_fn():
   input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
   return shape_utils.static_or_dynamic_map_fn(fn, input_tensor)
Esempio n. 20
0
    def loss(self, prediction_dict, true_image_shapes, scope=None):
        """Compute scalar loss tensors with respect to provided groundtruth.

    Calling this function requires that groundtruth tensors have been
    provided via the provide_groundtruth function.

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors. Note that this tensor *includes*
          background class predictions.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.
      scope: Optional scope name.

    Returns:
      a dictionary mapping loss keys (`localization_loss` and
        `classification_loss`) to scalar tensors representing corresponding loss
        values.
    """
        with tf.name_scope(scope, 'Loss', prediction_dict.values()):
            keypoints = None
            if self.groundtruth_has_field(fields.BoxListFields.keypoints):
                keypoints = self.groundtruth_lists(
                    fields.BoxListFields.keypoints)
            weights = None
            if self.groundtruth_has_field(fields.BoxListFields.weights):
                weights = self.groundtruth_lists(fields.BoxListFields.weights)
            (batch_cls_targets, batch_cls_weights, batch_reg_targets,
             batch_reg_weights, match_list) = self._assign_targets(
                 self.groundtruth_lists(fields.BoxListFields.boxes),
                 self.groundtruth_lists(fields.BoxListFields.classes),
                 keypoints, weights)
            if self._add_summaries:
                self._summarize_target_assignment(
                    self.groundtruth_lists(fields.BoxListFields.boxes),
                    match_list)

            if self._random_example_sampler:
                batch_sampled_indicator = tf.to_float(
                    shape_utils.static_or_dynamic_map_fn(
                        self._minibatch_subsample_fn,
                        [batch_cls_targets, batch_cls_weights],
                        dtype=tf.bool,
                        parallel_iterations=self._parallel_iterations,
                        back_prop=True))
                batch_reg_weights = tf.multiply(batch_sampled_indicator,
                                                batch_reg_weights)
                batch_cls_weights = tf.multiply(batch_sampled_indicator,
                                                batch_cls_weights)

            location_losses = self._localization_loss(
                prediction_dict['box_encodings'],
                batch_reg_targets,
                ignore_nan_targets=True,
                weights=batch_reg_weights)
            cls_losses = ops.reduce_sum_trailing_dimensions(
                self._classification_loss(
                    prediction_dict['class_predictions_with_background'],
                    batch_cls_targets,
                    weights=batch_cls_weights),
                ndims=2)

            if self._hard_example_miner:
                (localization_loss,
                 classification_loss) = self._apply_hard_mining(
                     location_losses, cls_losses, prediction_dict, match_list)
                if self._add_summaries:
                    self._hard_example_miner.summarize()
            else:
                if self._add_summaries:
                    class_ids = tf.argmax(batch_cls_targets, axis=2)
                    flattened_class_ids = tf.reshape(class_ids, [-1])
                    flattened_classification_losses = tf.reshape(
                        cls_losses, [-1])
                    self._summarize_anchor_classification_loss(
                        flattened_class_ids, flattened_classification_losses)
                localization_loss = tf.reduce_sum(location_losses)
                classification_loss = tf.reduce_sum(cls_losses)

            # Optionally normalize by number of positive matches
            normalizer = tf.constant(1.0, dtype=tf.float32)
            if self._normalize_loss_by_num_matches:
                normalizer = tf.maximum(
                    tf.to_float(tf.reduce_sum(batch_reg_weights)), 1.0)

            localization_loss_normalizer = normalizer
            if self._normalize_loc_loss_by_codesize:
                localization_loss_normalizer *= self._box_coder.code_size
            localization_loss = tf.multiply((self._localization_loss_weight /
                                             localization_loss_normalizer),
                                            localization_loss,
                                            name='localization_loss')
            classification_loss = tf.multiply(
                (self._classification_loss_weight / normalizer),
                classification_loss,
                name='classification_loss')

            loss_dict = {
                str(localization_loss.op.name): localization_loss,
                str(classification_loss.op.name): classification_loss
            }
        return loss_dict
Esempio n. 21
0
def result_dict_for_batched_example(images,
                                    keys,
                                    detections,
                                    groundtruth=None,
                                    class_agnostic=False,
                                    scale_to_absolute=False,
                                    original_image_spatial_shapes=None,
                                    true_image_shapes=None,
                                    max_gt_boxes=None):
    """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    images: A single 4D uint8 image tensor of shape [batch_size, H, W, C].
    keys: A [batch_size] string tensor with image identifier.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [batch_size, max_number_of_boxes, 4] float32 tensor
        of boxes, in normalized coordinates.
      'groundtruth_classes':  [batch_size, max_number_of_boxes] int64 tensor of
        1-indexed classes.
      'groundtruth_area': [batch_size, max_number_of_boxes] float32 tensor of
        bbox area. (Optional)
      'groundtruth_is_crowd':[batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_difficult': [batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_group_of': [batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_instance_masks': 4D int64 tensor of instance
        masks (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.
    original_image_spatial_shapes: A 2D int32 tensor of shape [batch_size, 2]
      used to resize the image. When set to None, the image size is retained.
    true_image_shapes: A 2D int32 tensor of shape [batch_size, 3]
      containing the size of the unpadded original_image.
    max_gt_boxes: [batch_size] tensor representing the maximum number of
      groundtruth boxes to pad.

  Returns:
    A dictionary with:
    'original_image': A [batch_size, H, W, C] uint8 image tensor.
    'original_image_spatial_shape': A [batch_size, 2] tensor containing the
      original image sizes.
    'true_image_shape': A [batch_size, 3] tensor containing the size of
      the unpadded original_image.
    'key': A [batch_size] string tensor with image identifier.
    'detection_boxes': [batch_size, max_detections, 4] float32 tensor of boxes,
      in normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [batch_size, max_detections] float32 tensor of scores.
    'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
      classes.
    'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
      binarized masks, reframed to full image masks.
    'num_detections': [batch_size] int64 tensor containing number of valid
      detections.
    'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
      classes. (Optional)
    'groundtruth_area': [batch_size, num_boxes] float32 tensor of bbox
      area. (Optional)
    'groundtruth_is_crowd': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 4D int64 tensor of instance masks
      (Optional).
    'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
      of groundtruth boxes per image.

  Raises:
    ValueError: if original_image_spatial_shape is not 2D int32 tensor of shape
      [2].
    ValueError: if true_image_shapes is not 2D int32 tensor of shape
      [3].
  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields
    if original_image_spatial_shapes is None:
        original_image_spatial_shapes = tf.tile(
            tf.expand_dims(tf.shape(images)[1:3], axis=0),
            multiples=[tf.shape(images)[0], 1])
    else:
        if (len(original_image_spatial_shapes.shape) != 2
                and original_image_spatial_shapes.shape[1] != 2):
            raise ValueError(
                '`original_image_spatial_shape` should be a 2D tensor of shape '
                '[batch_size, 2].')

    if true_image_shapes is None:
        true_image_shapes = tf.tile(tf.expand_dims(tf.shape(images)[1:4],
                                                   axis=0),
                                    multiples=[tf.shape(images)[0], 1])
    else:
        if (len(true_image_shapes.shape) != 2
                and true_image_shapes.shape[1] != 3):
            raise ValueError('`true_image_shapes` should be a 2D tensor of '
                             'shape [batch_size, 3].')

    output_dict = {
        input_data_fields.original_image:
        images,
        input_data_fields.key:
        keys,
        input_data_fields.original_image_spatial_shape:
        (original_image_spatial_shapes),
        input_data_fields.true_image_shape:
        true_image_shapes
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes]
    detection_scores = detections[detection_fields.detection_scores]
    num_detections = tf.cast(detections[detection_fields.num_detections],
                             dtype=tf.int32)

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes]) +
            label_id_offset)

    if scale_to_absolute:
        output_dict[detection_fields.detection_boxes] = (
            shape_utils.static_or_dynamic_map_fn(
                _scale_box_to_absolute,
                elems=[detection_boxes, original_image_spatial_shapes],
                dtype=tf.float32))
    else:
        output_dict[detection_fields.detection_boxes] = detection_boxes
    output_dict[detection_fields.detection_classes] = detection_classes
    output_dict[detection_fields.detection_scores] = detection_scores
    output_dict[detection_fields.num_detections] = num_detections

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks]
        # TODO(rathodv): This should be done in model's postprocess
        # function ideally.
        output_dict[detection_fields.detection_masks] = (
            shape_utils.static_or_dynamic_map_fn(
                _resize_detection_masks,
                elems=[
                    detection_boxes, detection_masks,
                    original_image_spatial_shapes
                ],
                dtype=tf.uint8))

    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[detection_fields.detection_keypoints]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            output_dict[detection_fields.detection_keypoints] = (
                shape_utils.static_or_dynamic_map_fn(
                    _scale_keypoint_to_absolute,
                    elems=[detection_keypoints, original_image_spatial_shapes],
                    dtype=tf.float32))

    if groundtruth:
        if max_gt_boxes is None:
            if input_data_fields.num_groundtruth_boxes in groundtruth:
                max_gt_boxes = groundtruth[
                    input_data_fields.num_groundtruth_boxes]
            else:
                raise ValueError(
                    'max_gt_boxes must be provided when processing batched examples.'
                )

        if input_data_fields.groundtruth_instance_masks in groundtruth:
            masks = groundtruth[input_data_fields.groundtruth_instance_masks]
            groundtruth[input_data_fields.groundtruth_instance_masks] = (
                shape_utils.static_or_dynamic_map_fn(
                    _resize_groundtruth_masks,
                    elems=[masks, original_image_spatial_shapes],
                    dtype=tf.uint8))

        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            output_dict[input_data_fields.groundtruth_boxes] = (
                shape_utils.static_or_dynamic_map_fn(
                    _scale_box_to_absolute,
                    elems=[groundtruth_boxes, original_image_spatial_shapes],
                    dtype=tf.float32))

        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

        output_dict[input_data_fields.num_groundtruth_boxes] = max_gt_boxes

    return output_dict
Esempio n. 22
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
                                         additional_fields=None,
                                         scope=None,
                                         parallel_iterations=32):
  """Multi-class version of non maximum suppression that operates on a batch.

  This op is similar to `multiclass_non_max_suppression` but operates on a batch
  of boxes and scores. See documentation for `multiclass_non_max_suppression`
  for details.

  Args:
    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
      detections. If `q` is 1 then same boxes are used for all classes
        otherwise, if `q` is equal to number of classes, class-specific boxes
        are used.
    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
      the scores for each of the `num_anchors` detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
      of the form [y_min, x_min, y_max, x_max] representing the window to clip
      boxes to before performing non-max suppression. This argument can also be
      a tensor of shape [4] in which case, the same clip window is applied to
      all images in the batch. If clip_widow is None, all boxes are used to
      perform non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered
      for each image in the batch.  This parameter allows for ignoring zero
      paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose dimensions are [batch_size, num_anchors, ...].
    scope: tf scope name.
    parallel_iterations: (optional) number of batch items to process in
      parallel.

  Returns:
    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
      containing the non-max suppressed boxes.
    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
      the scores for the boxes.
    'nmsed_classes': A [batch_size, max_detections] float32 tensor
      containing the class for boxes.
    'nmsed_masks': (optional) a
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box. This is set to None if input
      `masks` is None.
    'nmsed_additional_fields': (optional) a dictionary of
      [batch_size, max_detections, ...] float32 tensors corresponding to the
      tensors specified in the input `additional_fields`. This is not returned
      if input `additional_fields` is None.
    'num_detections': A [batch_size] int32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
      entries are zero paddings.

  Raises:
    ValueError: if `q` in boxes.shape is not 1 or not equal to number of
      classes as inferred from scores.shape.
  """
  q = boxes.shape[2].value
  num_classes = scores.shape[2].value
  if q != 1 and q != num_classes:
    raise ValueError('third dimension of boxes must be either 1 or equal '
                     'to the third dimension of scores')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')
  original_masks = masks
  original_additional_fields = additional_fields
  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
    boxes_shape = boxes.shape
    batch_size = boxes_shape[0].value
    num_anchors = boxes_shape[1].value

    if batch_size is None:
      batch_size = tf.shape(boxes)[0]
    if num_anchors is None:
      num_anchors = tf.shape(boxes)[1]

    # If num valid boxes aren't provided, create one and mark all boxes as
    # valid.
    if num_valid_boxes is None:
      num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors

    # If masks aren't provided, create dummy masks so we can only have one copy
    # of _single_image_nms_fn and discard the dummy masks after map_fn.
    if masks is None:
      masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
      masks = tf.zeros(masks_shape)

    if clip_window is None:
      clip_window = tf.stack([
          tf.reduce_min(boxes[:, :, :, 0]),
          tf.reduce_min(boxes[:, :, :, 1]),
          tf.reduce_max(boxes[:, :, :, 2]),
          tf.reduce_max(boxes[:, :, :, 3])
      ])
    if clip_window.shape.ndims == 1:
      clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])

    if additional_fields is None:
      additional_fields = {}

    def _single_image_nms_fn(args):
      """Runs NMS on a single image and returns padded output.

      Args:
        args: A list of tensors consisting of the following:
          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
            detections. If `q` is 1 then same boxes are used for all classes
            otherwise, if `q` is equal to number of classes, class-specific
            boxes are used.
          per_image_scores - A [num_anchors, num_classes] float32 tensor
            containing the scores for each of the `num_anchors` detections.
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
          per_image_clip_window - A 1D float32 tensor of the form
            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
            shape [batch_size] representing the number of valid boxes to be
            considered for each image in the batch.  This parameter allows for
            ignoring zero paddings.

      Returns:
        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
          non-max suppressed boxes.
        'nmsed_scores': A [max_detections] float32 tensor containing the scores
          for the boxes.
        'nmsed_classes': A [max_detections] float32 tensor containing the class
          for boxes.
        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
          float32 tensor containing masks for each selected box. This is set to
          None if input `masks` is None.
        'nmsed_additional_fields':  (optional) A variable number of float32
          tensors each with size [max_detections, ...] corresponding to the
          input `per_image_additional_fields`.
        'num_detections': A [batch_size] int32 tensor indicating the number of
          valid detections per batch item. Only the top num_detections[i]
          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
          rest of the entries are zero paddings.
      """
      per_image_boxes = args[0]
      per_image_scores = args[1]
      per_image_masks = args[2]
      per_image_clip_window = args[3]
      per_image_additional_fields = {
          key: value
          for key, value in zip(additional_fields, args[4:-1])
      }
      per_image_num_valid_boxes = args[-1]
      per_image_boxes = tf.reshape(
          tf.slice(per_image_boxes, 3 * [0],
                   tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
      per_image_scores = tf.reshape(
          tf.slice(per_image_scores, [0, 0],
                   tf.stack([per_image_num_valid_boxes, -1])),
          [-1, num_classes])
      per_image_masks = tf.reshape(
          tf.slice(per_image_masks, 4 * [0],
                   tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
          [-1, q, per_image_masks.shape[2].value,
           per_image_masks.shape[3].value])
      if per_image_additional_fields is not None:
        for key, tensor in per_image_additional_fields.items():
          additional_field_shape = tensor.get_shape()
          additional_field_dim = len(additional_field_shape)
          per_image_additional_fields[key] = tf.reshape(
              tf.slice(per_image_additional_fields[key],
                       additional_field_dim * [0],
                       tf.stack([per_image_num_valid_boxes] +
                                (additional_field_dim - 1) * [-1])),
              [-1] + [dim.value for dim in additional_field_shape[1:]])
      nmsed_boxlist = multiclass_non_max_suppression(
          per_image_boxes,
          per_image_scores,
          score_thresh,
          iou_thresh,
          max_size_per_class,
          max_total_size,
          clip_window=per_image_clip_window,
          change_coordinate_frame=change_coordinate_frame,
          masks=per_image_masks,
          additional_fields=per_image_additional_fields)
      padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
                                                         max_total_size)
      num_detections = nmsed_boxlist.num_boxes()
      nmsed_boxes = padded_boxlist.get()
      nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
      nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
      nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
      nmsed_additional_fields = [
          padded_boxlist.get_field(key) for key in per_image_additional_fields
      ]
      return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
              nmsed_additional_fields + [num_detections])

    num_additional_fields = 0
    if additional_fields is not None:
      num_additional_fields = len(additional_fields)
    num_nmsed_outputs = 4 + num_additional_fields

    batch_outputs = shape_utils.static_or_dynamic_map_fn(
        _single_image_nms_fn,
        elems=([boxes, scores, masks, clip_window] +
               list(additional_fields.values()) + [num_valid_boxes]),
        dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
        parallel_iterations=parallel_iterations)

    batch_nmsed_boxes = batch_outputs[0]
    batch_nmsed_scores = batch_outputs[1]
    batch_nmsed_classes = batch_outputs[2]
    batch_nmsed_masks = batch_outputs[3]
    batch_nmsed_additional_fields = {
        key: value
        for key, value in zip(additional_fields, batch_outputs[4:-1])
    }
    batch_num_detections = batch_outputs[-1]

    if original_masks is None:
      batch_nmsed_masks = None

    if original_additional_fields is None:
      batch_nmsed_additional_fields = None

    return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
            batch_nmsed_masks, batch_nmsed_additional_fields,
            batch_num_detections)
Esempio n. 23
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
                                         additional_fields=None,
                                         scope=None,
                                         parallel_iterations=32):
  """Multi-class version of non maximum suppression that operates on a batch.

  This op is similar to `multiclass_non_max_suppression` but operates on a batch
  of boxes and scores. See documentation for `multiclass_non_max_suppression`
  for details.

  Args:
    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
      detections. If `q` is 1 then same boxes are used for all classes
        otherwise, if `q` is equal to number of classes, class-specific boxes
        are used.
    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
      the scores for each of the `num_anchors` detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
      of the form [y_min, x_min, y_max, x_max] representing the window to clip
      boxes to before performing non-max suppression. This argument can also be
      a tensor of shape [4] in which case, the same clip window is applied to
      all images in the batch. If clip_widow is None, all boxes are used to
      perform non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered
      for each image in the batch.  This parameter allows for ignoring zero
      paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose dimensions are [batch_size, num_anchors, ...].
    scope: tf scope name.
    parallel_iterations: (optional) number of batch items to process in
      parallel.

  Returns:
    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
      containing the non-max suppressed boxes.
    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
      the scores for the boxes.
    'nmsed_classes': A [batch_size, max_detections] float32 tensor
      containing the class for boxes.
    'nmsed_masks': (optional) a
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box. This is set to None if input
      `masks` is None.
    'nmsed_additional_fields': (optional) a dictionary of
      [batch_size, max_detections, ...] float32 tensors corresponding to the
      tensors specified in the input `additional_fields`. This is not returned
      if input `additional_fields` is None.
    'num_detections': A [batch_size] int32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
      entries are zero paddings.

  Raises:
    ValueError: if `q` in boxes.shape is not 1 or not equal to number of
      classes as inferred from scores.shape.
  """
  q = boxes.shape[2].value
  num_classes = scores.shape[2].value
  if q != 1 and q != num_classes:
    raise ValueError('third dimension of boxes must be either 1 or equal '
                     'to the third dimension of scores')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')
  original_masks = masks
  original_additional_fields = additional_fields
  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
    boxes_shape = boxes.shape
    batch_size = boxes_shape[0].value
    num_anchors = boxes_shape[1].value

    if batch_size is None:
      batch_size = tf.shape(boxes)[0]
    if num_anchors is None:
      num_anchors = tf.shape(boxes)[1]

    # If num valid boxes aren't provided, create one and mark all boxes as
    # valid.
    if num_valid_boxes is None:
      num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors

    # If masks aren't provided, create dummy masks so we can only have one copy
    # of _single_image_nms_fn and discard the dummy masks after map_fn.
    if masks is None:
      masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
      masks = tf.zeros(masks_shape)

    if clip_window is None:
      clip_window = tf.stack([
          tf.reduce_min(boxes[:, :, :, 0]),
          tf.reduce_min(boxes[:, :, :, 1]),
          tf.reduce_max(boxes[:, :, :, 2]),
          tf.reduce_max(boxes[:, :, :, 3])
      ])
    if clip_window.shape.ndims == 1:
      clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])

    if additional_fields is None:
      additional_fields = {}

    def _single_image_nms_fn(args):
      """Runs NMS on a single image and returns padded output.

      Args:
        args: A list of tensors consisting of the following:
          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
            detections. If `q` is 1 then same boxes are used for all classes
            otherwise, if `q` is equal to number of classes, class-specific
            boxes are used.
          per_image_scores - A [num_anchors, num_classes] float32 tensor
            containing the scores for each of the `num_anchors` detections.
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
          per_image_clip_window - A 1D float32 tensor of the form
            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
            shape [batch_size] representing the number of valid boxes to be
            considered for each image in the batch.  This parameter allows for
            ignoring zero paddings.

      Returns:
        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
          non-max suppressed boxes.
        'nmsed_scores': A [max_detections] float32 tensor containing the scores
          for the boxes.
        'nmsed_classes': A [max_detections] float32 tensor containing the class
          for boxes.
        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
          float32 tensor containing masks for each selected box. This is set to
          None if input `masks` is None.
        'nmsed_additional_fields':  (optional) A variable number of float32
          tensors each with size [max_detections, ...] corresponding to the
          input `per_image_additional_fields`.
        'num_detections': A [batch_size] int32 tensor indicating the number of
          valid detections per batch item. Only the top num_detections[i]
          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
          rest of the entries are zero paddings.
      """
      per_image_boxes = args[0]
      per_image_scores = args[1]
      per_image_masks = args[2]
      per_image_clip_window = args[3]
      per_image_additional_fields = {
          key: value
          for key, value in zip(additional_fields, args[4:-1])
      }
      per_image_num_valid_boxes = args[-1]
      per_image_boxes = tf.reshape(
          tf.slice(per_image_boxes, 3 * [0],
                   tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
      per_image_scores = tf.reshape(
          tf.slice(per_image_scores, [0, 0],
                   tf.stack([per_image_num_valid_boxes, -1])),
          [-1, num_classes])
      per_image_masks = tf.reshape(
          tf.slice(per_image_masks, 4 * [0],
                   tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
          [-1, q, per_image_masks.shape[2].value,
           per_image_masks.shape[3].value])
      if per_image_additional_fields is not None:
        for key, tensor in per_image_additional_fields.items():
          additional_field_shape = tensor.get_shape()
          additional_field_dim = len(additional_field_shape)
          per_image_additional_fields[key] = tf.reshape(
              tf.slice(per_image_additional_fields[key],
                       additional_field_dim * [0],
                       tf.stack([per_image_num_valid_boxes] +
                                (additional_field_dim - 1) * [-1])),
              [-1] + [dim.value for dim in additional_field_shape[1:]])
      nmsed_boxlist = multiclass_non_max_suppression(
          per_image_boxes,
          per_image_scores,
          #score_thresh,
          iou_thresh,
          max_size_per_class,
          max_total_size,
          clip_window=per_image_clip_window,
          change_coordinate_frame=change_coordinate_frame,
          masks=per_image_masks,
          additional_fields=per_image_additional_fields)
      padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
                                                         max_total_size)
      num_detections = nmsed_boxlist.num_boxes()
      nmsed_boxes = padded_boxlist.get()
      nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
      nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
      nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
      nmsed_additional_fields = [
          padded_boxlist.get_field(key) for key in per_image_additional_fields
      ]
      return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
              nmsed_additional_fields + [num_detections])

    num_additional_fields = 0
    if additional_fields is not None:
      num_additional_fields = len(additional_fields)
    num_nmsed_outputs = 4 + num_additional_fields

    batch_outputs = shape_utils.static_or_dynamic_map_fn(
        _single_image_nms_fn,
        elems=([boxes, scores, masks, clip_window] +
               list(additional_fields.values()) + [num_valid_boxes]),
        dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
        parallel_iterations=parallel_iterations)

    batch_nmsed_boxes = batch_outputs[0]
    batch_nmsed_scores = batch_outputs[1]
    batch_nmsed_classes = batch_outputs[2]
    batch_nmsed_masks = batch_outputs[3]
    batch_nmsed_additional_fields = {
        key: value
        for key, value in zip(additional_fields, batch_outputs[4:-1])
    }
    batch_num_detections = batch_outputs[-1]

    if original_masks is None:
      batch_nmsed_masks = None

    if original_additional_fields is None:
      batch_nmsed_additional_fields = None

    return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
            batch_nmsed_masks, batch_nmsed_additional_fields,
            batch_num_detections)
Esempio n. 24
0
def filter_bbox(proposal_boxes, filter_boxes, proposal_scores, proposal_classes=None, filter_threshold=0.5,
                min_number=1, max_number=None):
    """Returns proposal_boxes_result and validation.

        Args:
            proposal_boxes: A float tensor with shape
                [batch_size, num_proposals, 4] representing the (potentially zero
                padded) proposal boxes for all images in the batch.
                the format is such as [x1,y2,x2,y2]
            proposal_scores: A float tensor with shape
                [batch_size, num_proposals, num_class] representing the (potentially zero
                added) proposal boxes for all images in the batch.
            filter_boxes: A float tensor with shape
                [batch_size, num_filter, 4] representing the filter proposal boxes (potentially zero
                padded) proposal boxes in the batch.
            filter_threshold: A float number for threshold in iou'

        Returns:
            proposal_boxes_result: A float tensor with shape
                [batch_size, num_proposals, 4] representing the (potentially zero
                padded) proposal boxes for all images in the batch.
            validation: A float tensor with shape
                [batch_size,] representing the vaildative proposal boxes for the batch.
    """

    proposal_boxes_shape = shape_utils.combined_static_and_dynamic_shape(proposal_boxes)
    proposal_areas = tf.multiply(tf.maximum(0.0, proposal_boxes[..., 2] - proposal_boxes[..., 0]),
                                 tf.maximum(0.0, proposal_boxes[..., 3] - proposal_boxes[..., 1]))
    proposal_boxes = tf.expand_dims(proposal_boxes, axis=2)
    filter_boxes = tf.expand_dims(filter_boxes, axis=1)
    x1 = tf.maximum(proposal_boxes[..., 0], filter_boxes[..., 0])
    y1 = tf.maximum(proposal_boxes[..., 1], filter_boxes[..., 1])
    x2 = tf.minimum(proposal_boxes[..., 2], filter_boxes[..., 2])
    y2 = tf.minimum(proposal_boxes[..., 3], filter_boxes[..., 3])
    proposal_boxes = tf.squeeze(proposal_boxes, axis=2)
    filter_boxes = tf.squeeze(filter_boxes, axis=1)
    proposal_bg_areas = tf.multiply(tf.maximum(0.0, x2 - x1), tf.maximum(0.0, y2 - y1))
    proposal_bg_ious = tf.reduce_sum(proposal_bg_areas, axis=2)

    proposal_bg_areas_iou = tf.div(proposal_bg_ious,
                                   tf.where(tf.equal(proposal_areas, 0), tf.ones_like(proposal_areas), proposal_areas)[
                                       0])
    keeps = tf.greater_equal(proposal_bg_areas_iou, filter_threshold)
    validation = tf.reduce_sum(tf.to_int32(keeps), axis=1)
    max_numbers = tf.reduce_max(validation)
    max_numbers = tf.maximum(max_numbers, min_number)

    def _per_batch_gather_padding(args):
        """Returns proposal_boxes_result, proposal_score_result and validation.

        Args:
            args[0]: boxes. A float tensor with shape
                        [num_proposals, 4] representing the (potentially zero
                        padded) proposal boxes for all images in the batch.
                        the format is such as [x1,y2,x2,y2]
            args[1]: scores. A float tensor with shape
                        [num_proposals, num_class] representing the (potentially zero
                        added) proposal boxes for all images in the batch.
            args[2]: keep. A bool tensor with shape
                        [num_proposals, 1] representing the coordination needing to
                        keep with true.
            args[3]: max_number. A int tensor with shape
                        [,] representing the kept proposal's number with padding zeros

            Returns:
                    proposal_boxes_result: A float tensor with shape
                        [max_number, 4] representing the (potentially zero
                        padded) proposal boxes for all images in the batch.
                    proposal_score_result: A float tensor with shape
                        [max_number, num_class] representing the (potentially zero
                        padded) proposal boxes for all images in the batch.
            """
        boxes = args[0]
        scores = args[1]
        keep = args[2]
        if proposal_classes is not None:
            classes = args[3]
        result_indice = tf.where(keep)
        result_indice = tf.squeeze(result_indice, axis=-1)
        boxes_result = tf.gather(boxes, result_indice)
        boxes_result = shape_utils.pad_or_clip_tensor(
            boxes_result, max_numbers)
        score_result = tf.gather(scores, result_indice)
        score_result = shape_utils.pad_or_clip_tensor(
            score_result, max_numbers)
        result = [boxes_result, score_result]
        if proposal_classes is not None:
            class_result = tf.gather(classes, result_indice)
            class_result = shape_utils.pad_or_clip_tensor(
                class_result, max_numbers)
            result.append(class_result)
        return result

    if proposal_classes is not None:
        proposal_boxes_result, proposal_score_result, proposal_class_result = shape_utils.static_or_dynamic_map_fn(
            _per_batch_gather_padding,
            [proposal_boxes,
             proposal_scores,
             keeps, proposal_classes], dtype=[tf.float32, tf.float32, tf.float32])

        # test using
        # return proposal_boxes_result, vaildation, proposal_bg_areas,proposal_bg_ious,proposal_bg_areas_iou
        return proposal_boxes_result, proposal_score_result, proposal_class_result, validation, max_numbers  # change ???
    else:
        proposal_boxes_result, proposal_score_result = shape_utils.static_or_dynamic_map_fn(_per_batch_gather_padding,
                                                                                            [proposal_boxes,
                                                                                             proposal_scores,
                                                                                             keeps])

        # test using
        # return proposal_boxes_result, vaildation, proposal_bg_areas,proposal_bg_ious,proposal_bg_areas_iou
        return proposal_boxes_result, proposal_score_result, validation, max_numbers  # change ???
Esempio n. 25
0
 def graph_fn(input_tensor):
   return shape_utils.static_or_dynamic_map_fn(fn, input_tensor)
Esempio n. 26
0
def prune_outside_window(dp_num_points,
                         dp_part_ids,
                         dp_surface_coords,
                         window,
                         scope=None):
    """Prunes DensePose points that fall outside a given window.

  This function replaces points that fall outside the given window with zeros.
  See also clip_to_window which clips any DensePose points that fall outside the
  given window.

  Note that this operation uses dynamic shapes, and therefore is not currently
  suitable for TPU.

  Args:
    dp_num_points: a tensor of shape [num_instances] that indicates how many
      (non-padded) DensePose points there are per instance.
    dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
      part ids. These part_ids are 0-indexed, where the first non-background
      part has index 0.
    dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
      DensePose surface coordinates in (y, x, v, u) format.
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window outside of which the op should prune the points.
    scope: name scope.

  Returns:
    new_dp_num_points: a tensor of shape [num_instances] that indicates how many
      (non-padded) DensePose points there are per instance after pruning.
    new_dp_part_ids: a tensor of shape [num_instances, num_points] with
      DensePose part ids. These part_ids are 0-indexed, where the first
      non-background part has index 0.
    new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
      DensePose surface coordinates after pruning.
  """
    with tf.name_scope(scope, 'DensePosePruneOutsideWindow'):
        y, x, _, _ = tf.unstack(dp_surface_coords, axis=-1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)

        num_instances, num_points = shape_utils.combined_static_and_dynamic_shape(
            dp_part_ids)
        dp_num_points_tiled = tf.tile(dp_num_points[:, tf.newaxis],
                                      multiples=[1, num_points])
        range_tiled = tf.tile(tf.range(num_points)[tf.newaxis, :],
                              multiples=[num_instances, 1])
        valid_initial = range_tiled < dp_num_points_tiled
        valid_in_window = tf.logical_and(
            tf.logical_and(y >= win_y_min, y <= win_y_max),
            tf.logical_and(x >= win_x_min, x <= win_x_max))
        valid_indices = tf.logical_and(valid_initial, valid_in_window)

        new_dp_num_points = tf.math.reduce_sum(tf.cast(valid_indices,
                                                       tf.int32),
                                               axis=1)
        max_num_points = tf.math.reduce_max(new_dp_num_points)

        def gather_and_reshuffle(elems):
            dp_part_ids, dp_surface_coords, valid_indices = elems
            locs = tf.where(valid_indices)[:, 0]
            valid_part_ids = tf.gather(dp_part_ids, locs, axis=0)
            valid_part_ids_padded = shape_utils.pad_or_clip_nd(
                valid_part_ids, output_shape=[max_num_points])
            valid_surface_coords = tf.gather(dp_surface_coords, locs, axis=0)
            valid_surface_coords_padded = shape_utils.pad_or_clip_nd(
                valid_surface_coords, output_shape=[max_num_points, 4])
            return [valid_part_ids_padded, valid_surface_coords_padded]

        new_dp_part_ids, new_dp_surface_coords = (
            shape_utils.static_or_dynamic_map_fn(
                gather_and_reshuffle,
                elems=[dp_part_ids, dp_surface_coords, valid_indices],
                dtype=[tf.int32, tf.float32],
                back_prop=False))
        return new_dp_num_points, new_dp_part_ids, new_dp_surface_coords
Esempio n. 27
0
def result_dict_for_batched_example(images,
                                    keys,
                                    detections,
                                    groundtruth=None,
                                    class_agnostic=False,
                                    scale_to_absolute=False,
                                    original_image_spatial_shapes=None,
                                    true_image_shapes=None,
                                    max_gt_boxes=None):
  """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    images: A single 4D uint8 image tensor of shape [batch_size, H, W, C].
    keys: A [batch_size] string tensor with image identifier.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [batch_size, max_number_of_boxes, 4] float32 tensor
        of boxes, in normalized coordinates.
      'groundtruth_classes':  [batch_size, max_number_of_boxes] int64 tensor of
        1-indexed classes.
      'groundtruth_area': [batch_size, max_number_of_boxes] float32 tensor of
        bbox area. (Optional)
      'groundtruth_is_crowd':[batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_difficult': [batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_group_of': [batch_size, max_number_of_boxes] int64
        tensor. (Optional)
      'groundtruth_instance_masks': 4D int64 tensor of instance
        masks (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.
    original_image_spatial_shapes: A 2D int32 tensor of shape [batch_size, 2]
      used to resize the image. When set to None, the image size is retained.
    true_image_shapes: A 2D int32 tensor of shape [batch_size, 3]
      containing the size of the unpadded original_image.
    max_gt_boxes: [batch_size] tensor representing the maximum number of
      groundtruth boxes to pad.

  Returns:
    A dictionary with:
    'original_image': A [batch_size, H, W, C] uint8 image tensor.
    'original_image_spatial_shape': A [batch_size, 2] tensor containing the
      original image sizes.
    'true_image_shape': A [batch_size, 3] tensor containing the size of
      the unpadded original_image.
    'key': A [batch_size] string tensor with image identifier.
    'detection_boxes': [batch_size, max_detections, 4] float32 tensor of boxes,
      in normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [batch_size, max_detections] float32 tensor of scores.
    'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
      classes.
    'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
      binarized masks, reframed to full image masks.
    'num_detections': [batch_size] int64 tensor containing number of valid
      detections.
    'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
      classes. (Optional)
    'groundtruth_area': [batch_size, num_boxes] float32 tensor of bbox
      area. (Optional)
    'groundtruth_is_crowd': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 4D int64 tensor of instance masks
      (Optional).
    'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
      of groundtruth boxes per image.

  Raises:
    ValueError: if original_image_spatial_shape is not 2D int32 tensor of shape
      [2].
    ValueError: if true_image_shapes is not 2D int32 tensor of shape
      [3].
  """
  label_id_offset = 1  # Applying label id offset (b/63711816)

  input_data_fields = fields.InputDataFields
  if original_image_spatial_shapes is None:
    original_image_spatial_shapes = tf.tile(
        tf.expand_dims(tf.shape(images)[1:3], axis=0),
        multiples=[tf.shape(images)[0], 1])
  else:
    if (len(original_image_spatial_shapes.shape) != 2 and
        original_image_spatial_shapes.shape[1] != 2):
      raise ValueError(
          '`original_image_spatial_shape` should be a 2D tensor of shape '
          '[batch_size, 2].')

  if true_image_shapes is None:
    true_image_shapes = tf.tile(
        tf.expand_dims(tf.shape(images)[1:4], axis=0),
        multiples=[tf.shape(images)[0], 1])
  else:
    if (len(true_image_shapes.shape) != 2
        and true_image_shapes.shape[1] != 3):
      raise ValueError('`true_image_shapes` should be a 2D tensor of '
                       'shape [batch_size, 3].')

  output_dict = {
      input_data_fields.original_image:
          images,
      input_data_fields.key:
          keys,
      input_data_fields.original_image_spatial_shape: (
          original_image_spatial_shapes),
      input_data_fields.true_image_shape:
          true_image_shapes
  }

  detection_fields = fields.DetectionResultFields
  detection_boxes = detections[detection_fields.detection_boxes]
  detection_scores = detections[detection_fields.detection_scores]
  num_detections = tf.to_int32(detections[detection_fields.num_detections])

  if class_agnostic:
    detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
  else:
    detection_classes = (
        tf.to_int64(detections[detection_fields.detection_classes]) +
        label_id_offset)

  if scale_to_absolute:
    output_dict[detection_fields.detection_boxes] = (
        shape_utils.static_or_dynamic_map_fn(
            _scale_box_to_absolute,
            elems=[detection_boxes, original_image_spatial_shapes],
            dtype=tf.float32))
  else:
    output_dict[detection_fields.detection_boxes] = detection_boxes
  output_dict[detection_fields.detection_classes] = detection_classes
  output_dict[detection_fields.detection_scores] = detection_scores
  output_dict[detection_fields.num_detections] = num_detections

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks]
    # TODO(rathodv): This should be done in model's postprocess
    # function ideally.
    output_dict[detection_fields.detection_masks] = (
        shape_utils.static_or_dynamic_map_fn(
            _resize_detection_masks,
            elems=[detection_boxes, detection_masks,
                   original_image_spatial_shapes],
            dtype=tf.uint8))

  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints]
    output_dict[detection_fields.detection_keypoints] = detection_keypoints
    if scale_to_absolute:
      output_dict[detection_fields.detection_keypoints] = (
          shape_utils.static_or_dynamic_map_fn(
              _scale_keypoint_to_absolute,
              elems=[detection_keypoints, original_image_spatial_shapes],
              dtype=tf.float32))

  if groundtruth:
    if max_gt_boxes is None:
      if input_data_fields.num_groundtruth_boxes in groundtruth:
        max_gt_boxes = groundtruth[input_data_fields.num_groundtruth_boxes]
      else:
        raise ValueError(
            'max_gt_boxes must be provided when processing batched examples.')

    if input_data_fields.groundtruth_instance_masks in groundtruth:
      masks = groundtruth[input_data_fields.groundtruth_instance_masks]
      groundtruth[input_data_fields.groundtruth_instance_masks] = (
          shape_utils.static_or_dynamic_map_fn(
              _resize_groundtruth_masks,
              elems=[masks, original_image_spatial_shapes],
              dtype=tf.uint8))

    output_dict.update(groundtruth)
    if scale_to_absolute:
      groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
      output_dict[input_data_fields.groundtruth_boxes] = (
          shape_utils.static_or_dynamic_map_fn(
              _scale_box_to_absolute,
              elems=[groundtruth_boxes, original_image_spatial_shapes],
              dtype=tf.float32))

    # For class-agnostic models, groundtruth classes all become 1.
    if class_agnostic:
      groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
      groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
      output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes

    output_dict[input_data_fields.num_groundtruth_boxes] = max_gt_boxes

  return output_dict
Esempio n. 28
0
  def loss(self, prediction_dict, true_image_shapes, scope=None):
    """Compute scalar loss tensors with respect to provided groundtruth.

    Calling this function requires that groundtruth tensors have been
    provided via the provide_groundtruth function.

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors. Note that this tensor *includes*
          background class predictions.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.
      scope: Optional scope name.

    Returns:
      a dictionary mapping loss keys (`localization_loss` and
        `classification_loss`) to scalar tensors representing corresponding loss
        values.
    """
    with tf.name_scope(scope, 'Loss', prediction_dict.values()):
      keypoints = None
      if self.groundtruth_has_field(fields.BoxListFields.keypoints):
        keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints)
      weights = None
      if self.groundtruth_has_field(fields.BoxListFields.weights):
        weights = self.groundtruth_lists(fields.BoxListFields.weights)
      (batch_cls_targets, batch_cls_weights, batch_reg_targets,
       batch_reg_weights, match_list) = self._assign_targets(
           self.groundtruth_lists(fields.BoxListFields.boxes),
           self.groundtruth_lists(fields.BoxListFields.classes),
           keypoints, weights)
      if self._add_summaries:
        self._summarize_target_assignment(
            self.groundtruth_lists(fields.BoxListFields.boxes), match_list)

      if self._random_example_sampler:
        batch_sampled_indicator = tf.to_float(
            shape_utils.static_or_dynamic_map_fn(
                self._minibatch_subsample_fn,
                [batch_cls_targets, batch_cls_weights],
                dtype=tf.bool,
                parallel_iterations=self._parallel_iterations,
                back_prop=True))
        batch_reg_weights = tf.multiply(batch_sampled_indicator,
                                        batch_reg_weights)
        batch_cls_weights = tf.multiply(batch_sampled_indicator,
                                        batch_cls_weights)

      location_losses = self._localization_loss(
          prediction_dict['box_encodings'],
          batch_reg_targets,
          ignore_nan_targets=True,
          weights=batch_reg_weights)
      cls_losses = ops.reduce_sum_trailing_dimensions(
          self._classification_loss(
              prediction_dict['class_predictions_with_background'],
              batch_cls_targets,
              weights=batch_cls_weights),
          ndims=2)

      if self._hard_example_miner:
        (localization_loss, classification_loss) = self._apply_hard_mining(
            location_losses, cls_losses, prediction_dict, match_list)
        if self._add_summaries:
          self._hard_example_miner.summarize()
      else:
        if self._add_summaries:
          class_ids = tf.argmax(batch_cls_targets, axis=2)
          flattened_class_ids = tf.reshape(class_ids, [-1])
          flattened_classification_losses = tf.reshape(cls_losses, [-1])
          self._summarize_anchor_classification_loss(
              flattened_class_ids, flattened_classification_losses)
        localization_loss = tf.reduce_sum(location_losses)
        classification_loss = tf.reduce_sum(cls_losses)

      # Optionally normalize by number of positive matches
      normalizer = tf.constant(1.0, dtype=tf.float32)
      if self._normalize_loss_by_num_matches:
        normalizer = tf.maximum(tf.to_float(tf.reduce_sum(batch_reg_weights)),
                                1.0)

      localization_loss_normalizer = normalizer
      if self._normalize_loc_loss_by_codesize:
        localization_loss_normalizer *= self._box_coder.code_size
      localization_loss = tf.multiply((self._localization_loss_weight /
                                       localization_loss_normalizer),
                                      localization_loss,
                                      name='localization_loss')
      classification_loss = tf.multiply((self._classification_loss_weight /
                                         normalizer), classification_loss,
                                        name='classification_loss')

      loss_dict = {
          str(localization_loss.op.name): localization_loss,
          str(classification_loss.op.name): classification_loss
      }
    return loss_dict
Esempio n. 29
0
 def graph_fn(input_tensor, scalar_index_tensor):
   map_fn_output = shape_utils.static_or_dynamic_map_fn(
       fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)
   return map_fn_output
Esempio n. 30
0
  def loss(self, prediction_dict, true_image_shapes, scope=None):
    """Compute scalar loss tensors with respect to provided groundtruth.

    Calling this function requires that groundtruth tensors have been
    provided via the provide_groundtruth function.

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors. Note that this tensor *includes*
          background class predictions.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.
      scope: Optional scope name.

    Returns:
      a dictionary mapping loss keys (`localization_loss` and
        `classification_loss`) to scalar tensors representing corresponding loss
        values.
    """
    with tf.name_scope(scope, 'Loss', prediction_dict.values()):
      keypoints = None
      if self.groundtruth_has_field(fields.BoxListFields.keypoints):
        keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints)
      weights = None
      if self.groundtruth_has_field(fields.BoxListFields.weights):
        weights = self.groundtruth_lists(fields.BoxListFields.weights)
      (batch_cls_targets, batch_cls_weights, batch_reg_targets,
       batch_reg_weights, match_list) = self._assign_targets(
           self.groundtruth_lists(fields.BoxListFields.boxes),
           self.groundtruth_lists(fields.BoxListFields.classes),
           keypoints, weights)
      if self._add_summaries:
        self._summarize_target_assignment(
            self.groundtruth_lists(fields.BoxListFields.boxes), match_list)

      if self._bbox_ignore_background_mask:
        # Filter using a single mask provided in ground truth.
        batch_size, num_anchors, box_code_dimension = \
            prediction_dict['box_encodings'].get_shape()
        batch_ignore_masks = self.groundtruth_lists(fields.BoxListFields.masks)
        batch_sampled_indicator = []
        print(batch_ignore_masks)
        for i in range(batch_size):
            # The single provided groundtruth mask is the ignore mask.
            ignore_mask = tf.cast(batch_ignore_masks[i][0], tf.float32)
            height, width = ignore_mask.get_shape().as_list()
            ignore_mask = tf.reshape(ignore_mask, [1, height, width, 1])
            bboxes = prediction_dict['box_encodings'][i]
            bboxes = tf.minimum(tf.maximum(bboxes, 0.), 1.)
            # mask = tf.tile([[height, width, height, width]], [num_anchors, 1])
            bboxes = tf.cast(bboxes * tf.cast([height, width, height, width], tf.float32), tf.int32)
            bboxes = tf.map_fn(lambda x: tf.reduce_mean(ignore_mask[x[0]:x[2], x[1]:x[3]]), bboxes, dtype=tf.float32)
            print(bboxes)
            sampled_indicator = bboxes < self._bbox_ignore_background_mask.overlap_threshold
            # bbox_idx = [0] * num_anchors 
            # ignore_intersections = tf.image.crop_and_resize(
            #     ignore_mask, bboxes, bbox_idx, (100, 100), method='nearest')
            # sampled_indicator = \
            #     tf.reduce_mean(ignore_intersections, axis=[1, 2, 3]) < \
            #     self._bbox_ignore_background_mask.overlap_threshold
            sampled_indicator = tf.cast(sampled_indicator, tf.float32)
            batch_sampled_indicator.append(sampled_indicator)
        batch_sampled_indicator = tf.stack(batch_sampled_indicator, axis=0)
        batch_reg_weights = tf.multiply(batch_sampled_indicator,
                                        batch_reg_weights)
        batch_cls_weights = tf.multiply(batch_sampled_indicator,
                                        batch_cls_weights)

      if self._random_example_sampler:
        batch_sampled_indicator = tf.to_float(
            shape_utils.static_or_dynamic_map_fn(
                self._minibatch_subsample_fn,
                [batch_cls_targets, batch_cls_weights],
                dtype=tf.bool,
                parallel_iterations=self._parallel_iterations,
                back_prop=True))
        batch_reg_weights = tf.multiply(batch_sampled_indicator,
                                        batch_reg_weights)
        batch_cls_weights = tf.multiply(batch_sampled_indicator,
                                        batch_cls_weights)

      losses_mask = None
      if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
        losses_mask = tf.stack(self.groundtruth_lists(
            fields.InputDataFields.is_annotated))
      location_losses = self._localization_loss(
          prediction_dict['box_encodings'],
          batch_reg_targets,
          ignore_nan_targets=True,
          weights=batch_reg_weights,
          losses_mask=losses_mask)

      cls_losses = self._classification_loss(
          prediction_dict['class_predictions_with_background'],
          batch_cls_targets,
          weights=batch_cls_weights,
          losses_mask=losses_mask)

      if self._expected_classification_loss_under_sampling:
        if cls_losses.get_shape().ndims == 3:
          batch_size, num_anchors, num_classes = cls_losses.get_shape()
          cls_losses = tf.reshape(cls_losses, [batch_size, -1])
          batch_cls_targets = tf.reshape(
              batch_cls_targets, [batch_size, num_anchors * num_classes, -1])
          batch_cls_targets = tf.concat(
              [1 - batch_cls_targets, batch_cls_targets], axis=-1)

        cls_losses = self._expected_classification_loss_under_sampling(
            batch_cls_targets, cls_losses)

        classification_loss = tf.reduce_sum(cls_losses)
        localization_loss = tf.reduce_sum(location_losses)
      elif self._hard_example_miner:
        cls_losses = ops.reduce_sum_trailing_dimensions(cls_losses, ndims=2)
        (localization_loss, classification_loss) = self._apply_hard_mining(
            location_losses, cls_losses, prediction_dict, match_list)
        if self._add_summaries:
          self._hard_example_miner.summarize()
      else:
        cls_losses = ops.reduce_sum_trailing_dimensions(cls_losses, ndims=2)
        localization_loss = tf.reduce_sum(location_losses)
        classification_loss = tf.reduce_sum(cls_losses)

      # Optionally normalize by number of positive matches
      normalizer = tf.constant(1.0, dtype=tf.float32)
      if self._normalize_loss_by_num_matches:
        normalizer = tf.maximum(tf.to_float(tf.reduce_sum(batch_reg_weights)),
                                1.0)

      localization_loss_normalizer = normalizer
      if self._normalize_loc_loss_by_codesize:
        localization_loss_normalizer *= self._box_coder.code_size
      localization_loss = tf.multiply((self._localization_loss_weight /
                                       localization_loss_normalizer),
                                      localization_loss,
                                      name='localization_loss')
      classification_loss = tf.multiply((self._classification_loss_weight /
                                         normalizer), classification_loss,
                                        name='classification_loss')

      loss_dict = {
          str(localization_loss.op.name): localization_loss,
          str(classification_loss.op.name): classification_loss
      }
    return loss_dict