def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): batch_size = tf.shape(boxes)[0] new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) iou = box_utils.bbox_overlap(new_slice, box_slice) ret_slice = tf.expand_dims( tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), 2) * box_slice return boxes, ret_slice, iou_threshold, inner_idx + 1
def _suppression_loop_body(boxes, iou_threshold, output_size, idx): """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE). Args: boxes: a tensor with a shape of [batch_size, anchors, 4]. iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. output_size: an int32 tensor of size [batch_size]. Representing the number of selected boxes for each batch. idx: an integer scalar representing induction variable. Returns: boxes: updated boxes. iou_threshold: pass down iou_threshold to the next iteration. output_size: the updated output_size. idx: the updated induction variable. """ num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE batch_size = tf.shape(boxes)[0] # Iterates over tiles that can possibly suppress the current tile. box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) _, box_slice, _, _ = tf.while_loop( lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, _cross_suppression, [boxes, box_slice, iou_threshold, tf.constant(0)]) # Iterates over the current tile to compute self-suppression. iou = box_utils.bbox_overlap(box_slice, box_slice) mask = tf.expand_dims( tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape( tf.range(NMS_TILE_SIZE), [-1, 1]), 0) iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype) suppressed_iou, _, _ = tf.while_loop( lambda _iou, loop_condition, _iou_sum: loop_condition, _self_suppression, [iou, tf.constant(True), tf.reduce_sum(iou, [1, 2])]) suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0 box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2) # Uses box_slice to update the input boxes. mask = tf.reshape(tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1]) boxes = tf.tile(tf.expand_dims( box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape( boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask) boxes = tf.reshape(boxes, [batch_size, -1, 4]) # Updates output_size. output_size += tf.reduce_sum( tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1]) return boxes, iou_threshold, output_size, idx + 1
def build_outputs(self, inputs, mode): is_training = mode == mode_keys.TRAIN model_outputs = {} image = inputs['image'] _, image_height, image_width, _ = image.get_shape().as_list() backbone_features = self._backbone_fn(image, is_training) fpn_features = self._fpn_fn(backbone_features, is_training) # rpn_centerness. if self._include_centerness: rpn_score_outputs, rpn_box_outputs, rpn_center_outputs = ( self._rpn_head_fn(fpn_features, is_training)) model_outputs.update({ 'rpn_center_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), rpn_center_outputs), }) object_scores = rpn_center_outputs else: rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn( fpn_features, is_training) object_scores = None model_outputs.update({ 'rpn_score_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), rpn_score_outputs), 'rpn_box_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), rpn_box_outputs), }) input_anchor = anchor.Anchor(self._params.architecture.min_level, self._params.architecture.max_level, self._params.anchor.num_scales, self._params.anchor.aspect_ratios, self._params.anchor.anchor_size, (image_height, image_width)) rpn_rois, rpn_roi_scores = self._generate_rois_fn( rpn_box_outputs, rpn_score_outputs, input_anchor.multilevel_boxes, inputs['image_info'][:, 1, :], is_training, is_box_lrtb=self._include_centerness, object_scores=object_scores, ) if (not self._include_frcnn_class and not self._include_frcnn_box and not self._include_mask): # if not is_training: # For direct RPN detection, # use dummy box_outputs = (dy,dx,dh,dw = 0,0,0,0) box_outputs = tf.zeros_like(rpn_rois) box_outputs = tf.concat([box_outputs, box_outputs], -1) boxes, scores, classes, valid_detections = self._generate_detections_fn( box_outputs, rpn_roi_scores, rpn_rois, inputs['image_info'][:, 1:2, :], is_single_fg_score=True, # if no_background, no softmax is applied. keep_nms=True) model_outputs.update({ 'num_detections': valid_detections, 'detection_boxes': boxes, 'detection_classes': classes, 'detection_scores': scores, }) return model_outputs # ---- OLN-Proposal finishes here. ---- if is_training: rpn_rois = tf.stop_gradient(rpn_rois) rpn_roi_scores = tf.stop_gradient(rpn_roi_scores) # Sample proposals. (rpn_rois, rpn_roi_scores, matched_gt_boxes, matched_gt_classes, matched_gt_indices) = ( self._sample_rois_fn(rpn_rois, rpn_roi_scores, inputs['gt_boxes'], inputs['gt_classes'])) # Create bounding box training targets. box_targets = box_utils.encode_boxes( matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0]) # If the target is background, the box target is set to all 0s. box_targets = tf.where( tf.tile( tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), [1, 1, 4]), tf.zeros_like(box_targets), box_targets) model_outputs.update({ 'class_targets': matched_gt_classes, 'box_targets': box_targets, }) # Create Box-IoU targets. { box_ious = box_utils.bbox_overlap( rpn_rois, inputs['gt_boxes']) matched_box_ious = tf.reduce_max(box_ious, 2) model_outputs.update({ 'box_iou_targets': matched_box_ious,}) # } roi_features = spatial_transform_ops.multilevel_crop_and_resize( fpn_features, rpn_rois, output_size=7) if not self._include_box_score: class_outputs, box_outputs = self._frcnn_head_fn( roi_features, is_training) else: class_outputs, box_outputs, score_outputs = self._frcnn_head_fn( roi_features, is_training) model_outputs.update({ 'box_score_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), score_outputs),}) model_outputs.update({ 'class_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), class_outputs), 'box_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), box_outputs), }) # Add this output to train to make the checkpoint loadable in predict mode. # If we skip it in train mode, the heads will be out-of-order and checkpoint # loading will fail. if not self._include_frcnn_box: box_outputs = tf.zeros_like(box_outputs) # dummy zeros. if self._include_box_score: score_outputs = tf.cast(tf.squeeze(score_outputs, -1), rpn_roi_scores.dtype) # box-score = (rpn-centerness * box-iou)^(1/2) # TR: rpn_roi_scores: b,1000, score_outputs: b,512 # TS: rpn_roi_scores: b,1000, score_outputs: b,1000 box_scores = tf.pow( rpn_roi_scores * tf.sigmoid(score_outputs), 1/2.) if not self._include_frcnn_class: boxes, scores, classes, valid_detections = self._generate_detections_fn( box_outputs, box_scores, rpn_rois, inputs['image_info'][:, 1:2, :], is_single_fg_score=True, keep_nms=True,) else: boxes, scores, classes, valid_detections = self._generate_detections_fn( box_outputs, class_outputs, rpn_rois, inputs['image_info'][:, 1:2, :], keep_nms=True,) model_outputs.update({ 'num_detections': valid_detections, 'detection_boxes': boxes, 'detection_classes': classes, 'detection_scores': scores, }) # ---- OLN-Box finishes here. ---- if not self._include_mask: return model_outputs if is_training: rpn_rois, classes, mask_targets = self._sample_masks_fn( rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices, inputs['gt_masks']) mask_targets = tf.stop_gradient(mask_targets) classes = tf.cast(classes, dtype=tf.int32) model_outputs.update({ 'mask_targets': mask_targets, 'sampled_class_targets': classes, }) else: rpn_rois = boxes classes = tf.cast(classes, dtype=tf.int32) mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize( fpn_features, rpn_rois, output_size=14) mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes, is_training) if is_training: model_outputs.update({ 'mask_outputs': tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), mask_outputs), }) else: model_outputs.update({'detection_masks': tf.nn.sigmoid(mask_outputs)}) return model_outputs
def box_matching(boxes, gt_boxes, gt_classes): """Match boxes to groundtruth boxes. Given the proposal boxes and the groundtruth boxes and classes, perform the groundtruth matching by taking the argmax of the IoU between boxes and groundtruth boxes. Args: boxes: a tensor of shape of [batch_size, N, 4] representing the box coordiantes to be matched to groundtruth boxes. gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing the groundtruth box coordinates. It is padded with -1s to indicate the invalid boxes. gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box classes. It is padded with -1s to indicate the invalid classes. Returns: matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing the matched groundtruth box coordinates for each input box. If the box does not overlap with any groundtruth boxes, the matched boxes of it will be set to all 0s. matched_gt_classes: a tensor of shape of [batch_size, N], representing the matched groundtruth classes for each input box. If the box does not overlap with any groundtruth boxes, the matched box classes of it will be set to 0, which corresponds to the background class. matched_gt_indices: a tensor of shape of [batch_size, N], representing the indices of the matched groundtruth boxes in the original gt_boxes tensor. If the box does not overlap with any groundtruth boxes, the index of the matched groundtruth will be set to -1. matched_iou: a tensor of shape of [batch_size, N], representing the IoU between the box and its matched groundtruth box. The matched IoU is the maximum IoU of the box and all the groundtruth boxes. iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix between boxes and the groundtruth boxes. The IoU between a box and the invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. """ # Compute IoU between boxes and gt_boxes. # iou <- [batch_size, N, K] iou = box_utils.bbox_overlap(boxes, gt_boxes) # max_iou <- [batch_size, N] # 0.0 -> no match to gt, or -1.0 match to no gt matched_iou = tf.reduce_max(iou, axis=-1) # background_box_mask <- bool, [batch_size, N] background_box_mask = tf.less_equal(matched_iou, 0.0) argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) argmax_iou_indices_shape = tf.shape(argmax_iou_indices) batch_indices = ( tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) * tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1) matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), tf.zeros_like(matched_gt_boxes, dtype=matched_gt_boxes.dtype), matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) matched_gt_classes = tf.where(background_box_mask, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where(background_box_mask, -tf.ones_like(argmax_iou_indices), argmax_iou_indices) return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, iou)