def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): batch_size = tf.shape(boxes)[0] new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) iou = box_utils.bbox_overlap(new_slice, box_slice) ret_slice = tf.expand_dims( tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), 2) * box_slice return boxes, ret_slice, iou_threshold, inner_idx + 1
def _suppression_loop_body(boxes, iou_threshold, output_size, idx): """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE). Args: boxes: a tensor with a shape of [batch_size, anchors, 4]. iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. output_size: an int32 tensor of size [batch_size]. Representing the number of selected boxes for each batch. idx: an integer scalar representing induction variable. Returns: boxes: updated boxes. iou_threshold: pass down iou_threshold to the next iteration. output_size: the updated output_size. idx: the updated induction variable. """ num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE batch_size = tf.shape(boxes)[0] # Iterates over tiles that can possibly suppress the current tile. box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) _, box_slice, _, _ = tf.while_loop( lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, _cross_suppression, [boxes, box_slice, iou_threshold, tf.constant(0)]) # Iterates over the current tile to compute self-suppression. iou = box_utils.bbox_overlap(box_slice, box_slice) mask = tf.expand_dims( tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape( tf.range(NMS_TILE_SIZE), [-1, 1]), 0) iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype) suppressed_iou, _, _ = tf.while_loop( lambda _iou, loop_condition, _iou_sum: loop_condition, _self_suppression, [iou, tf.constant(True), tf.reduce_sum(iou, [1, 2])]) suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0 box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2) # Uses box_slice to update the input boxes. mask = tf.reshape(tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1]) boxes = tf.tile(tf.expand_dims( box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape( boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask) boxes = tf.reshape(boxes, [batch_size, -1, 4]) # Updates output_size. output_size += tf.reduce_sum( tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1]) return boxes, iou_threshold, output_size, idx + 1
def assign(self, gt_boxes, gt_labels, anchors, predicted_boxes): with tf.name_scope("assign"): # Compute the L1 cost between boxes # Note that we use anchors and predict boxes both C = self._cdist(predicted_boxes, gt_boxes) C1 = self._cdist(anchors, gt_boxes) _, indices = tf.nn.top_k(C, k=self.match_times) _, indices2 = tf.nn.top_k(C1, k=self.match_times) indices = tf.transpose(indices) indices2 = tf.transpose(indices2) indices = tf.reshape(indices, [-1, 1]) indices2 = tf.reshape(indices2, [-1, 1]) indices = tf.concat([indices, indices2], 0) gt_boxes = tf.tile(gt_boxes, [self.match_times * 2, 1]) gt_labels = tf.tile(gt_labels, [self.match_times * 2]) anchor_ious = box_utils.bbox_overlap(anchors, gt_boxes) pos_anchor_ious = tf.gather_nd( anchor_ious, tf.concat([indices, gt_labels[:, None]], -1)) pos_ignore_mask = pos_anchor_ious < self.neg_ignore_thresh gt_labels = tf.where(pos_ignore_mask, 0 - tf.ones_like(gt_labels), gt_labels) tgt_boxes = tf.scatter_nd(indices, gt_boxes, tf.shape(predicted_boxes)) tgt_labels = tf.scatter_nd(indices, gt_labels, tf.shape(predicted_boxes[:, 0])) pred_ious = box_utils.bbox_overlap(predicted_boxes, gt_boxes) pred_max_ious = tf.reduce_max(pred_ious, 1) neg_ignore_mask = pred_max_ious > self.neg_ignore_thresh tgt_labels = tf.where(neg_ignore_mask, 0 - tf.ones_like(tgt_labels), tgt_labels) return tgt_boxes, tgt_labels
def assign(self, gt_boxes, gt_labels, proposals): """Assign gt to boxes/ This method assign a gt box to every box (proposal/anchor), each box will be assigned with -1, 0 or a positive number. -1 means don't care, 0 means negative sample, positive number is the index (1-based) of assigned gt. The assignment is done in following steps, the order matters: 1. initialize target boxes and labels. 2. assign proposals whose iou with all gts < neg_iou_thresh to 0. 3. for each box, if the iou with its nearest gt >= pos_iou_thresh, assign it to that box. 4. for each gt box, assign its best proposals (may be more than one) to itself. Args: proposals (Tensor): Bounding boxes to be assigned, shape (n, 4). gt_boxes (Tensor): Ground-truth boxes, shape (k, 4). gt_labels (Tensor): Ground-truth labels, shape (k, ). Returns: target_boxes (Tensor), target_labels (Tensor). """ valid_mask = tf.greater(gt_labels, 0) gt_boxes = tf.boolean_mask(gt_boxes, valid_mask) gt_labels = tf.boolean_mask(gt_labels, valid_mask) gt_boxes = tf.concat( [gt_boxes, tf.zeros([1, 4], gt_boxes.dtype)], axis=0) gt_labels = tf.concat( [gt_labels, tf.zeros([1], gt_labels.dtype)], axis=0) overlaps = box_utils.bbox_overlap(proposals, gt_boxes) # if tf.greater(self.ignore_iof_thresh, 0) and ignored_gt_boxes is not None: # ignored_overlaps = unaligned_box_iof(proposals, ignored_gt_boxes) # (n, m) # ignored_max_overlaps = tf.reduce_max(ignored_overlaps, axis=1, keepdims=True) # (n, ) # ignored_mask = tf.greater_equal(ignored_max_overlaps, self.ignore_iof_thresh) # overlaps *= tf.cast(ignored_mask, overlaps.dtype) return self.assign_wrt_overlaps(overlaps, gt_boxes, gt_labels)
def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): """Suppress boxes between different tiles. Args: boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4] box_slice: a tensor of shape [batch_size, _NMS_TILE_SIZE, 4] iou_threshold: a scalar tensor inner_idx: a scalar tensor representing the tile index of the tile that is used to supress box_slice Returns: boxes: unchanged boxes as input box_slice_after_suppression: box_slice after suppression iou_threshold: unchanged """ batch_size = tf.shape(boxes)[0] new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) iou = box_utils.bbox_overlap(new_slice, box_slice) box_slice_after_suppression = tf.expand_dims( tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), 2) * box_slice return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1
def box_matching(boxes, gt_boxes, gt_classes, gt_attributes): """Match boxes to groundtruth boxes. Given the proposal boxes and the groundtruth boxes, classes and attributes, perform the groundtruth matching by taking the argmax of the IoU between boxes and groundtruth boxes. Args: boxes: a tensor of shape of [batch_size, N, 4] representing the box coordiantes to be matched to groundtruth boxes. gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing the groundtruth box coordinates. It is padded with -1s to indicate the invalid boxes. gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box classes. It is padded with -1s to indicate the invalid classes. gt_attributes: [batch_size, MAX_NUM_INSTANCES, num_attributes] representing the groundtruth attributes. It is padded with -1s to indicate the invalid attributes. Returns: matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing the matched groundtruth box coordinates for each input box. If the box does not overlap with any groundtruth boxes, the matched boxes of it will be set to all 0s. matched_gt_classes: a tensor of shape of [batch_size, N], representing the matched groundtruth classes for each input box. If the box does not overlap with any groundtruth boxes, the matched box classes of it will be set to 0, which corresponds to the background class. matched_gt_attributes: a tensor of shape of [batch_size, N, num_attributes], representing the matched groundtruth attributes for each input box. If the box does not overlap with any groundtruth boxes, the matched box attributes of it will be set to all 0s. matched_gt_indices: a tensor of shape of [batch_size, N], representing the indices of the matched groundtruth boxes in the original gt_boxes tensor. If the box does not overlap with any groundtruth boxes, the index of the matched groundtruth will be set to -1. matched_iou: a tensor of shape of [batch_size, N], representing the IoU between the box and its matched groundtruth box. The matched IoU is the maximum IoU of the box and all the groundtruth boxes. iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix between boxes and the groundtruth boxes. The IoU between a box and the invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. """ # Compute IoU between boxes and gt_boxes. # iou <- [batch_size, N, K] iou = box_utils.bbox_overlap(boxes, gt_boxes) # max_iou <- [batch_size, N] # 0.0 -> no match to gt, or -1.0 match to no gt matched_iou = tf.reduce_max(iou, axis=-1) # background_box_mask <- bool, [batch_size, N] background_box_mask = tf.less_equal(matched_iou, 0.0) argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) argmax_iou_indices_shape = tf.shape(argmax_iou_indices) batch_indices = ( tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) * tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1) matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) matched_gt_classes = tf.where(background_box_mask, tf.zeros_like(matched_gt_classes), matched_gt_classes) _, _, num_attributes = gt_attributes.get_shape().as_list() matched_gt_attributes = tf.gather_nd(gt_attributes, gather_nd_indices) matched_gt_attributes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, num_attributes]), tf.zeros_like(matched_gt_attributes, dtype=tf.float32), matched_gt_attributes) matched_gt_indices = tf.where(background_box_mask, -tf.ones_like(argmax_iou_indices), argmax_iou_indices) return (matched_gt_boxes, matched_gt_classes, matched_gt_attributes, matched_gt_indices, matched_iou, iou)
def assign(self, gt_boxes, gt_labels, proposals, num_proposals_per_level, gt_boxes_ignore=None): """Assign gt to bboxes. The assignment is done in following steps 1. compute iou between all proposal and gt_box 2. compute center distance between all proposal and gt_box 3. on each pyramid level, for each gt, select k bbox whose center are closest to the gt center. 4. get corresponding iou for the these candidates, and compute the mean and std, set mean + std as the iou threshold 5. select these candidates whose iou are greater than or equal to the threshold as postive 6. limit the positive sample's center in gt_box Args: proposals (Tensor): Bounding boxes to be assigned, shape (n, 4). gt_boxes (Tensor): Ground-truth boxes, shape (k, 4). gt_labels (Tensor): Ground-truth labels, shape (k, ). gt_boxes_ignore: Ground truth bboxes that are labelled as `ignored`, e.g., crowd boxes in COCO. Returns: target_boxes, target_labels """ # 1. compute iou between all proposal and gt_box overlaps = box_utils.bbox_overlap(proposals, gt_boxes) # [k, n] num_gts = tf.shape(overlaps)[0] # [k, ] num_proposals = tf.shape(overlaps)[1] #[n, ] # 2. compute center distance between all proposal and gt_box gt_centers = (gt_boxes[:, 0:2] + gt_boxes[:, 2:4]) * 0.5 proposal_centers = (proposals[:, 0:2] + proposals[:, 2:4]) * 0.5 distances = tf.math.sqrt( tf.reduce_sum( tf.math.squared_difference(gt_centers[:, None, :], proposal_centers[None, :, :]), -1)) # (k, n) # 3. on each pyramid level, for each gt, select k bbox whose center # are closest to the gt center. topk_inds_list = [] start_ind = 0 for _, num in enumerate(num_proposals_per_level): end_ind = start_ind + num _, topk_inds = tf.nn.top_k(-distances[:, start_ind:end_ind], k=self.topk) # (k, topk) topk_inds_list.append(topk_inds + start_ind) start_ind = end_ind topk_inds = tf.concat(topk_inds_list, 1) num_topk = self.topk * len(num_proposals_per_level) inds = tf.stack([ tf.reshape(tf.repeat(tf.range(num_gts), num_topk), [num_gts, num_topk]), topk_inds ], -1) # 4. get corresponding iou for the these candidates, and compute the # mean and std, set mean + std as the iou threshold candidate_overlaps = tf.gather_nd(overlaps, inds) # [k, topk] mean_per_gt, var_per_gt = tf.nn.moments(candidate_overlaps, 1, keepdims=True) std_per_gt = tf.math.sqrt(var_per_gt) overlaps_thresh_per_gt = mean_per_gt + std_per_gt # 5. select these candidates whose iou are greater than or equal to # the threshold as postive is_pos = candidate_overlaps >= overlaps_thresh_per_gt # (k, topk) # 6. limit the positive sample's center in gt_boxes # calculate the left, top, right, bottom distance between # positive box center and gt_box side left = tf.tile(tf.expand_dims(proposal_centers[:, 0], 0), [num_gts, 1]) - gt_boxes[:, 0:1] # (k, n) top = tf.tile(tf.expand_dims(proposal_centers[:, 1], 0), [num_gts, 1]) - gt_boxes[:, 1:2] right = gt_boxes[:, 2:3] - tf.tile( tf.expand_dims(proposal_centers[:, 0], 0), [num_gts, 1]) bottom = gt_boxes[:, 3:4] - tf.tile( tf.expand_dims(proposal_centers[:, 1], 0), [num_gts, 1]) is_in_gt = tf.reduce_min(tf.stack([left, top, right, bottom], -1), -1) > 0.01 # (k, n) is_in_gt = tf.gather_nd(is_in_gt, inds) is_pos = tf.logical_and(is_pos, is_in_gt) topk_inds += (tf.reshape(tf.repeat(tf.range(num_gts), num_topk), [num_gts, num_topk]) * num_proposals) candidate_inds = tf.boolean_mask(topk_inds, is_pos) # if an anchor box is assigned to multiple gts # the one with highest IoU will be seleted. overlaps_inf = tf.cast(tf.fill([num_gts * num_proposals], -INF), tf.float32) overlaps_inf = tf.tensor_scatter_nd_update( overlaps_inf, candidate_inds[:, None], tf.gather(tf.reshape(overlaps, [num_gts * num_proposals]), candidate_inds)) overlaps_inf = tf.reshape(overlaps_inf, (num_gts, num_proposals)) max_overlaps = tf.reduce_max(overlaps_inf, 0) argmax_overlaps = tf.argmax(overlaps_inf, 0) target_boxes = tf.gather(gt_boxes, argmax_overlaps) target_labels = tf.gather(gt_labels, argmax_overlaps) positive = max_overlaps > -INF target_labels = tf.where(positive, target_labels, tf.zeros_like(positive, tf.int64)) return target_boxes, target_labels