Exemple #1
0
  def testSortedNonMaxSuppressionOnTPUSpeed(self):
    boxes_np = np.random.rand(2, 12000, 4).astype(np.float32)
    scores_np = np.random.rand(2, 12000).astype(np.float32)
    iou_threshold_np = np.array(0.7, dtype=np.float32)

    boxes = tf.constant(boxes_np)
    scores = tf.constant(scores_np)
    iou_threshold = tf.constant(iou_threshold_np)

    # Runs on TPU.
    strategy = tf.distribute.experimental.TPUStrategy()
    with strategy.scope():
      scores_tpu, boxes_tpu = nms.sorted_non_max_suppression_padded(
          boxes=boxes,
          scores=scores,
          max_output_size=2000,
          iou_threshold=iou_threshold)

    self.assertEqual(scores_tpu.numpy().shape, (2, 2000))
    self.assertEqual(boxes_tpu.numpy().shape, (2, 2000, 4))
Exemple #2
0
  def testSortedNonMaxSuppressionOnCPU(self):
    boxes_np = np.array(self.boxes_data, dtype=np.float32)
    scores_np = np.array(self.scores_data, dtype=np.float32)
    iou_threshold_np = np.array(self.iou_threshold, dtype=np.float32)

    boxes = tf.constant(boxes_np)
    scores = tf.constant(scores_np)
    iou_threshold = tf.constant(iou_threshold_np)

    # Runs on CPU.
    scores_cpu, boxes_cpu = nms.sorted_non_max_suppression_padded(
        boxes=boxes,
        scores=scores,
        max_output_size=self.max_output_size,
        iou_threshold=iou_threshold)

    self.assertEqual(boxes_cpu.numpy().shape, (2, self.max_output_size, 4))
    self.assertAllClose(scores_cpu.numpy(),
                        [[0.9, 0.6, 0.4, 0.3, 0., 0.],
                         [0.8, 0.7, 0.5, 0.3, 0., 0.]])
def _generate_detections_v2(boxes: tf.Tensor,
                            scores: tf.Tensor,
                            pre_nms_top_k: int = 5000,
                            pre_nms_score_threshold: float = 0.05,
                            nms_iou_threshold: float = 0.5,
                            max_num_detections: int = 100):
  """Generates the final detections given the model outputs.

  This implementation unrolls classes dimension while using the tf.while_loop
  to implement the batched NMS, so that it can be parallelized at the batch
  dimension. It should give better performance comparing to v1 implementation.
  It is TPU compatible.

  Args:
    boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
      `[batch_size, N, 1, 4]`, which box predictions on all feature levels. The
      N is the number of total anchors on all levels.
    scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
      stacks class probability on all feature levels. The N is the number of
      total anchors on all levels. The num_classes is the number of classes
      predicted by the model. Note that the class_outputs here is the raw score.
    pre_nms_top_k: An `int` number of top candidate detections per class before
      NMS.
    pre_nms_score_threshold: A `float` representing the threshold for deciding
      when to remove boxes based on score.
    nms_iou_threshold: A `float` representing the threshold for deciding whether
      boxes overlap too much with respect to IOU.
    max_num_detections: A `scalar` representing maximum number of boxes retained
      over all classes.

  Returns:
    nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
      representing top detected boxes in [y1, x1, y2, x2].
    nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections]
      representing sorted confidence scores for detected boxes. The values are
      between [0, 1].
    nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections]
      representing classes for detected boxes.
    valid_detections: An `int` tf.Tensor of shape [batch_size] only the top
      `valid_detections` boxes are valid detections.
  """
  with tf.name_scope('generate_detections'):
    nmsed_boxes = []
    nmsed_classes = []
    nmsed_scores = []
    valid_detections = []
    batch_size, _, num_classes_for_box, _ = boxes.get_shape().as_list()
    if batch_size is None:
      batch_size = tf.shape(boxes)[0]
    _, total_anchors, num_classes = scores.get_shape().as_list()
    # Selects top pre_nms_num scores and indices before NMS.
    scores, indices = _select_top_k_scores(
        scores, min(total_anchors, pre_nms_top_k))
    for i in range(num_classes):
      boxes_i = boxes[:, :, min(num_classes_for_box - 1, i), :]
      scores_i = scores[:, :, i]
      # Obtains pre_nms_top_k before running NMS.
      boxes_i = tf.gather(boxes_i, indices[:, :, i], batch_dims=1, axis=1)

      # Filter out scores.
      boxes_i, scores_i = box_ops.filter_boxes_by_scores(
          boxes_i, scores_i, min_score_threshold=pre_nms_score_threshold)

      (nmsed_scores_i, nmsed_boxes_i) = nms.sorted_non_max_suppression_padded(
          tf.cast(scores_i, tf.float32),
          tf.cast(boxes_i, tf.float32),
          max_num_detections,
          iou_threshold=nms_iou_threshold)
      nmsed_classes_i = tf.fill([batch_size, max_num_detections], i)
      nmsed_boxes.append(nmsed_boxes_i)
      nmsed_scores.append(nmsed_scores_i)
      nmsed_classes.append(nmsed_classes_i)
  nmsed_boxes = tf.concat(nmsed_boxes, axis=1)
  nmsed_scores = tf.concat(nmsed_scores, axis=1)
  nmsed_classes = tf.concat(nmsed_classes, axis=1)
  nmsed_scores, indices = tf.nn.top_k(
      nmsed_scores, k=max_num_detections, sorted=True)
  nmsed_boxes = tf.gather(nmsed_boxes, indices, batch_dims=1, axis=1)
  nmsed_classes = tf.gather(nmsed_classes, indices, batch_dims=1)
  valid_detections = tf.reduce_sum(
      input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32), axis=1)
  return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def _multilevel_propose_rois(raw_boxes,
                             raw_scores,
                             anchor_boxes,
                             image_shape,
                             pre_nms_top_k=2000,
                             pre_nms_score_threshold=0.0,
                             pre_nms_min_size_threshold=0.0,
                             nms_iou_threshold=0.7,
                             num_proposals=1000,
                             use_batched_nms=False,
                             decode_boxes=True,
                             clip_boxes=True,
                             apply_sigmoid_to_score=True):
    """Proposes RoIs given a group of candidates from different FPN levels.

  The following describes the steps:
    1. For each individual level:
      a. Apply sigmoid transform if specified.
      b. Decode boxes if specified.
      c. Clip boxes if specified.
      d. Filter small boxes and those fall outside image if specified.
      e. Apply pre-NMS filtering including pre-NMS top k and score thresholding.
      f. Apply NMS.
    2. Aggregate post-NMS boxes from each level.
    3. Apply an overall top k to generate the final selected RoIs.

  Args:
    raw_boxes: A `dict` with keys representing FPN levels and values
      representing box tenors of shape
      [batch_size, feature_h, feature_w, num_anchors * 4].
    raw_scores: A `dict` with keys representing FPN levels and values
      representing logit tensors of shape
      [batch_size, feature_h, feature_w, num_anchors].
    anchor_boxes: A `dict` with keys representing FPN levels and values
      representing anchor box tensors of shape
      [batch_size, feature_h * feature_w * num_anchors, 4].
    image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension
      are [height, width] of the scaled image.
    pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep
      before applying NMS. Default: 2000.
    pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal
      box score to keep before applying NMS. This is often used as a
      pre-filtering step for better performance. Default: 0, no filtering is
      applied.
    pre_nms_min_size_threshold: A `float` representing the minimal box size in
      each side (w.r.t. the scaled image) to keep before applying NMS. This is
      often used as a pre-filtering step for better performance. Default: 0, no
      filtering is applied.
    nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold
      used for NMS. If 0.0, no NMS is applied. Default: 0.7.
    num_proposals: An `int` of top scoring RPN proposals *in total* to keep
      after applying NMS. Default: 1000.
    use_batched_nms: A `bool` indicating whether NMS is applied in batch using
      `tf.image.combined_non_max_suppression`. Currently only available in
      CPU/GPU. Default is False.
    decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded
      using `anchor_boxes`. If False, use `raw_boxes` directly and ignore
      `anchor_boxes`. Default is True.
    clip_boxes: A `bool` indicating whether boxes are first clipped to the
      scaled image size before appliying NMS. If False, no clipping is applied
      and `image_shape` is ignored. Default is True.
    apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to
      `raw_scores` before applying NMS. Default is True.

  Returns:
    selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4],
      representing the box coordinates of the selected proposals w.r.t. the
      scaled image.
    selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1],
      representing the scores of the selected proposals.
  """
    with tf.name_scope('multilevel_propose_rois'):
        rois = []
        roi_scores = []
        image_shape = tf.expand_dims(image_shape, axis=1)
        for level in sorted(raw_scores.keys()):
            with tf.name_scope('level_%s' % level):
                _, feature_h, feature_w, num_anchors_per_location = (
                    raw_scores[level].get_shape().as_list())

                num_boxes = feature_h * feature_w * num_anchors_per_location
                this_level_scores = tf.reshape(raw_scores[level],
                                               [-1, num_boxes])
                this_level_boxes = tf.reshape(raw_boxes[level],
                                              [-1, num_boxes, 4])
                this_level_anchors = tf.cast(tf.reshape(
                    anchor_boxes[level], [-1, num_boxes, 4]),
                                             dtype=this_level_scores.dtype)

                if apply_sigmoid_to_score:
                    this_level_scores = tf.sigmoid(this_level_scores)

                if decode_boxes:
                    this_level_boxes = box_ops.decode_boxes(
                        this_level_boxes, this_level_anchors)
                if clip_boxes:
                    this_level_boxes = box_ops.clip_boxes(
                        this_level_boxes, image_shape)

                if pre_nms_min_size_threshold > 0.0:
                    this_level_boxes, this_level_scores = box_ops.filter_boxes(
                        this_level_boxes, this_level_scores, image_shape,
                        pre_nms_min_size_threshold)

                this_level_pre_nms_top_k = min(num_boxes, pre_nms_top_k)
                this_level_post_nms_top_k = min(num_boxes, num_proposals)
                if nms_iou_threshold > 0.0:
                    if use_batched_nms:
                        this_level_rois, this_level_roi_scores, _, _ = (
                            tf.image.combined_non_max_suppression(
                                tf.expand_dims(this_level_boxes, axis=2),
                                tf.expand_dims(this_level_scores, axis=-1),
                                max_output_size_per_class=
                                this_level_pre_nms_top_k,
                                max_total_size=this_level_post_nms_top_k,
                                iou_threshold=nms_iou_threshold,
                                score_threshold=pre_nms_score_threshold,
                                pad_per_class=False,
                                clip_boxes=False))
                    else:
                        if pre_nms_score_threshold > 0.0:
                            this_level_boxes, this_level_scores = (
                                box_ops.filter_boxes_by_scores(
                                    this_level_boxes, this_level_scores,
                                    pre_nms_score_threshold))
                        this_level_boxes, this_level_scores = box_ops.top_k_boxes(
                            this_level_boxes,
                            this_level_scores,
                            k=this_level_pre_nms_top_k)
                        this_level_roi_scores, this_level_rois = (
                            nms.sorted_non_max_suppression_padded(
                                this_level_scores,
                                this_level_boxes,
                                max_output_size=this_level_post_nms_top_k,
                                iou_threshold=nms_iou_threshold))
                else:
                    this_level_rois, this_level_roi_scores = box_ops.top_k_boxes(
                        this_level_boxes,
                        this_level_scores,
                        k=this_level_post_nms_top_k)

                rois.append(this_level_rois)
                roi_scores.append(this_level_roi_scores)

        all_rois = tf.concat(rois, axis=1)
        all_roi_scores = tf.concat(roi_scores, axis=1)

        with tf.name_scope('top_k_rois'):
            _, num_valid_rois = all_roi_scores.get_shape().as_list()
            overall_top_k = min(num_valid_rois, num_proposals)

            selected_rois, selected_roi_scores = box_ops.top_k_boxes(
                all_rois, all_roi_scores, k=overall_top_k)

        return selected_rois, selected_roi_scores