def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx):
  batch_size = tf.shape(boxes)[0]
  new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0],
                       [batch_size, NMS_TILE_SIZE, 4])
  iou = box_utils.bbox_overlap(new_slice, box_slice)
  ret_slice = tf.expand_dims(
      tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype),
      2) * box_slice
  return boxes, ret_slice, iou_threshold, inner_idx + 1
Ejemplo n.º 2
0
def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
    """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).

  Args:
    boxes: a tensor with a shape of [batch_size, anchors, 4].
    iou_threshold: a float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    output_size: an int32 tensor of size [batch_size]. Representing the number
      of selected boxes for each batch.
    idx: an integer scalar representing induction variable.

  Returns:
    boxes: updated boxes.
    iou_threshold: pass down iou_threshold to the next iteration.
    output_size: the updated output_size.
    idx: the updated induction variable.
  """
    num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE
    batch_size = tf.shape(boxes)[0]

    # Iterates over tiles that can possibly suppress the current tile.
    box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0],
                         [batch_size, NMS_TILE_SIZE, 4])
    _, box_slice, _, _ = tf.while_loop(
        lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
        _cross_suppression, [boxes, box_slice, iou_threshold,
                             tf.constant(0)])

    # Iterates over the current tile to compute self-suppression.
    iou = box_utils.bbox_overlap(box_slice, box_slice)
    mask = tf.expand_dims(
        tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape(
            tf.range(NMS_TILE_SIZE), [-1, 1]), 0)
    iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype)
    suppressed_iou, _, _ = tf.while_loop(
        lambda _iou, loop_condition, _iou_sum: loop_condition,
        _self_suppression,
        [iou, tf.constant(True),
         tf.reduce_sum(iou, [1, 2])])
    suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0
    box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype),
                                2)

    # Uses box_slice to update the input boxes.
    mask = tf.reshape(tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype),
                      [1, -1, 1, 1])
    boxes = tf.tile(tf.expand_dims(
        box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape(
            boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask)
    boxes = tf.reshape(boxes, [batch_size, -1, 4])

    # Updates output_size.
    output_size += tf.reduce_sum(
        tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1])
    return boxes, iou_threshold, output_size, idx + 1
Ejemplo n.º 3
0
    def build_outputs(self, inputs, mode):
        is_training = mode == mode_keys.TRAIN
        model_outputs = {}

        image = inputs['image']
        _, image_height, image_width, _ = image.get_shape().as_list()
        backbone_features = self._backbone_fn(image, is_training)
        fpn_features = self._fpn_fn(backbone_features, is_training)

        # rpn_centerness.
        if self._include_centerness:
            rpn_score_outputs, rpn_box_outputs, rpn_center_outputs = (
                self._rpn_head_fn(fpn_features, is_training))
            model_outputs.update({
                'rpn_center_outputs':
                tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                      rpn_center_outputs),
            })
            object_scores = rpn_center_outputs
        else:
            rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn(
                fpn_features, is_training)
            object_scores = None
        model_outputs.update({
            'rpn_score_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  rpn_score_outputs),
            'rpn_box_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  rpn_box_outputs),
        })
        input_anchor = anchor.Anchor(self._params.architecture.min_level,
                                     self._params.architecture.max_level,
                                     self._params.anchor.num_scales,
                                     self._params.anchor.aspect_ratios,
                                     self._params.anchor.anchor_size,
                                     (image_height, image_width))
        rpn_rois, rpn_roi_scores = self._generate_rois_fn(
            rpn_box_outputs,
            rpn_score_outputs,
            input_anchor.multilevel_boxes,
            inputs['image_info'][:, 1, :],
            is_training,
            is_box_lrtb=self._include_centerness,
            object_scores=object_scores,
        )
        if (not self._include_frcnn_class and not self._include_frcnn_box
                and not self._include_mask):
            # if not is_training:
            # For direct RPN detection,
            # use dummy box_outputs = (dy,dx,dh,dw = 0,0,0,0)
            box_outputs = tf.zeros_like(rpn_rois)
            box_outputs = tf.concat([box_outputs, box_outputs], -1)
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs,
                rpn_roi_scores,
                rpn_rois,
                inputs['image_info'][:, 1:2, :],
                is_single_fg_score=
                True,  # if no_background, no softmax is applied.
                keep_nms=True)
            model_outputs.update({
                'num_detections': valid_detections,
                'detection_boxes': boxes,
                'detection_classes': classes,
                'detection_scores': scores,
            })
            return model_outputs

        # ---- OLN-Proposal finishes here. ----

        if is_training:
            rpn_rois = tf.stop_gradient(rpn_rois)
            rpn_roi_scores = tf.stop_gradient(rpn_roi_scores)

            # Sample proposals.
            (rpn_rois, rpn_roi_scores, matched_gt_boxes, matched_gt_classes,
             matched_gt_indices) = (self._sample_rois_fn(
                 rpn_rois, rpn_roi_scores, inputs['gt_boxes'],
                 inputs['gt_classes']))
            # Create bounding box training targets.
            box_targets = box_utils.encode_boxes(
                matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0])
            # If the target is background, the box target is set to all 0s.
            box_targets = tf.where(
                tf.tile(
                    tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
                    [1, 1, 4]), tf.zeros_like(box_targets), box_targets)
            model_outputs.update({
                'class_targets': matched_gt_classes,
                'box_targets': box_targets,
            })
            # Create Box-IoU targets. {
            box_ious = box_utils.bbox_overlap(rpn_rois, inputs['gt_boxes'])
            matched_box_ious = tf.reduce_max(box_ious, 2)
            model_outputs.update({
                'box_iou_targets': matched_box_ious,
            })  # }

        roi_features = spatial_transform_ops.multilevel_crop_and_resize(
            fpn_features, rpn_rois, output_size=7)

        if not self._include_box_score:
            class_outputs, box_outputs = self._frcnn_head_fn(
                roi_features, is_training)
        else:
            class_outputs, box_outputs, score_outputs = self._frcnn_head_fn(
                roi_features, is_training)
            model_outputs.update({
                'box_score_outputs':
                tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                      score_outputs),
            })
        model_outputs.update({
            'class_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  class_outputs),
            'box_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  box_outputs),
        })

        # Add this output to train to make the checkpoint loadable in predict mode.
        # If we skip it in train mode, the heads will be out-of-order and checkpoint
        # loading will fail.
        if not self._include_frcnn_box:
            box_outputs = tf.zeros_like(box_outputs)  # dummy zeros.

        if self._include_box_score:
            score_outputs = tf.cast(tf.squeeze(score_outputs, -1),
                                    rpn_roi_scores.dtype)

            # box-score = (rpn-centerness * box-iou)^(1/2)
            # TR: rpn_roi_scores: b,1000, score_outputs: b,512
            # TS: rpn_roi_scores: b,1000, score_outputs: b,1000
            box_scores = tf.pow(rpn_roi_scores * tf.sigmoid(score_outputs),
                                1 / 2.)

        if not self._include_frcnn_class:
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs,
                box_scores,
                rpn_rois,
                inputs['image_info'][:, 1:2, :],
                is_single_fg_score=True,
                keep_nms=True,
            )
        else:
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs,
                class_outputs,
                rpn_rois,
                inputs['image_info'][:, 1:2, :],
                keep_nms=True,
            )
        model_outputs.update({
            'num_detections': valid_detections,
            'detection_boxes': boxes,
            'detection_classes': classes,
            'detection_scores': scores,
        })

        # ---- OLN-Box finishes here. ----

        if not self._include_mask:
            return model_outputs

        if is_training:
            rpn_rois, classes, mask_targets = self._sample_masks_fn(
                rpn_rois, matched_gt_boxes, matched_gt_classes,
                matched_gt_indices, inputs['gt_masks'])
            mask_targets = tf.stop_gradient(mask_targets)

            classes = tf.cast(classes, dtype=tf.int32)

            model_outputs.update({
                'mask_targets': mask_targets,
                'sampled_class_targets': classes,
            })
        else:
            rpn_rois = boxes
            classes = tf.cast(classes, dtype=tf.int32)

        mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
            fpn_features, rpn_rois, output_size=14)

        mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes,
                                           is_training)

        if is_training:
            model_outputs.update({
                'mask_outputs':
                tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                      mask_outputs),
            })
        else:
            model_outputs.update(
                {'detection_masks': tf.nn.sigmoid(mask_outputs)})

        return model_outputs
def box_matching(boxes, gt_boxes, gt_classes):
    """Match boxes to groundtruth boxes.

  Given the proposal boxes and the groundtruth boxes and classes, perform the
  groundtruth matching by taking the argmax of the IoU between boxes and
  groundtruth boxes.

  Args:
    boxes: a tensor of shape of [batch_size, N, 4] representing the box
      coordiantes to be matched to groundtruth boxes.
    gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
      the groundtruth box coordinates. It is padded with -1s to indicate the
      invalid boxes.
    gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
      classes. It is padded with -1s to indicate the invalid classes.

  Returns:
    matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
      the matched groundtruth box coordinates for each input box. If the box
      does not overlap with any groundtruth boxes, the matched boxes of it
      will be set to all 0s.
    matched_gt_classes: a tensor of shape of [batch_size, N], representing
      the matched groundtruth classes for each input box. If the box does not
      overlap with any groundtruth boxes, the matched box classes of it will
      be set to 0, which corresponds to the background class.
    matched_gt_indices: a tensor of shape of [batch_size, N], representing
      the indices of the matched groundtruth boxes in the original gt_boxes
      tensor. If the box does not overlap with any groundtruth boxes, the
      index of the matched groundtruth will be set to -1.
    matched_iou: a tensor of shape of [batch_size, N], representing the IoU
      between the box and its matched groundtruth box. The matched IoU is the
      maximum IoU of the box and all the groundtruth boxes.
    iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
      between boxes and the groundtruth boxes. The IoU between a box and the
      invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
  """
    # Compute IoU between boxes and gt_boxes.
    # iou <- [batch_size, N, K]
    iou = box_utils.bbox_overlap(boxes, gt_boxes)

    # max_iou <- [batch_size, N]
    # 0.0 -> no match to gt, or -1.0 match to no gt
    matched_iou = tf.reduce_max(iou, axis=-1)

    # background_box_mask <- bool, [batch_size, N]
    background_box_mask = tf.less_equal(matched_iou, 0.0)

    argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32)

    argmax_iou_indices_shape = tf.shape(argmax_iou_indices)
    batch_indices = (
        tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) *
        tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1)

    matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices)
    matched_gt_boxes = tf.where(
        tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]),
        tf.zeros_like(matched_gt_boxes, dtype=matched_gt_boxes.dtype),
        matched_gt_boxes)

    matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
    matched_gt_classes = tf.where(background_box_mask,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)

    matched_gt_indices = tf.where(background_box_mask,
                                  -tf.ones_like(argmax_iou_indices),
                                  argmax_iou_indices)

    return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
            matched_iou, iou)