def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): batch_size = tf.shape(boxes)[0] new_slice = tf.slice(boxes, [0, inner_idx * _NMS_TILE_SIZE, 0], [batch_size, _NMS_TILE_SIZE, 4]) iou = box_utils.bbox_overlap(new_slice, box_slice) ret_slice = tf.expand_dims( tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), 2) * box_slice return boxes, ret_slice, iou_threshold, inner_idx + 1
def _suppression_loop_body(boxes, iou_threshold, output_size, idx): """Process boxes in the range [idx*_NMS_TILE_SIZE, (idx+1)*_NMS_TILE_SIZE). Args: boxes: a tensor with a shape of [batch_size, anchors, 4]. iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. output_size: an int32 tensor of size [batch_size]. Representing the number of selected boxes for each batch. idx: an integer scalar representing induction variable. Returns: boxes: updated boxes. iou_threshold: pass down iou_threshold to the next iteration. output_size: the updated output_size. idx: the updated induction variable. """ num_tiles = tf.shape(boxes)[1] // _NMS_TILE_SIZE batch_size = tf.shape(boxes)[0] # Iterates over tiles that can possibly suppress the current tile. box_slice = tf.slice(boxes, [0, idx * _NMS_TILE_SIZE, 0], [batch_size, _NMS_TILE_SIZE, 4]) _, box_slice, _, _ = tf.while_loop( lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, _cross_suppression, [boxes, box_slice, iou_threshold, tf.constant(0)]) # Iterates over the current tile to compute self-suppression. iou = box_utils.bbox_overlap(box_slice, box_slice) mask = tf.expand_dims( tf.reshape(tf.range(_NMS_TILE_SIZE), [1, -1]) > tf.reshape( tf.range(_NMS_TILE_SIZE), [-1, 1]), 0) iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype) suppressed_iou, _, _ = tf.while_loop( lambda _iou, loop_condition, _iou_sum: loop_condition, _self_suppression, [iou, tf.constant(True), tf.reduce_sum(iou, [1, 2])]) suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0 box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2) # Uses box_slice to update the input boxes. mask = tf.reshape(tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1]) boxes = tf.tile(tf.expand_dims( box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape( boxes, [batch_size, num_tiles, _NMS_TILE_SIZE, 4]) * (1 - mask) boxes = tf.reshape(boxes, [batch_size, -1, 4]) # Updates output_size. output_size += tf.reduce_sum( tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1]) return boxes, iou_threshold, output_size, idx + 1
def proposal_label_op(boxes, gt_boxes, gt_labels, image_info, batch_size_per_im=512, fg_fraction=0.25, fg_thresh=0.5, bg_thresh_hi=0.5, bg_thresh_lo=0.): """Assigns the proposals with ground truth labels and performs subsmpling. Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the following algorithm to generate the final `batch_size_per_im` RoIs. 1. Calculates the IoU between each proposal box and each gt_boxes. 2. Assigns each proposal box with a ground truth class and box label by choosing the largest overlap. 3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns box_targets, class_targets, and RoIs. The reference implementations of #1 and #2 are here: https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py # pylint: disable=line-too-long The reference implementation of #3 is here: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py. # pylint: disable=line-too-long Args: boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The last dimension is the pixel coordinates of scaled images in [ymin, xmin, ymax, xmax] form. gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This tensor might have paddings with a value of -1. The coordinates of gt_boxes are in the pixel coordinates of the original image scale. gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with a value of -1. image_info: a tensor of shape [batch_size, 5] where the three columns encode the input image's [height, width, scale, original_height, original_width]. Height and width are for the input to the network, not the original image; scale is the scale factor used to scale the network input size to the original image size. See dataloader.DetectionInputProcessor for details. The last two are original height and width. batch_size_per_im: a integer represents RoI minibatch size per image. fg_fraction: a float represents the target fraction of RoI minibatch that is labeled foreground (i.e., class > 0). fg_thresh: a float represents the overlap threshold for an RoI to be considered foreground (if >= fg_thresh). bg_thresh_hi: a float represents the overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). bg_thresh_lo: a float represents the overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). Returns: box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor contains the ground truth pixel coordinates of the scaled images for each roi. K is the number of sample RoIs (e.g., batch_size_per_im). class_targets: a integer tensor with a shape of [batch_size, K]. The tensor contains the ground truth class for each roi. rois: a tensor with a shape of [batch_size, K, 4], representing the coordinates of the selected RoI. proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor keeps the mapping between proposal to labels. proposal_to_label_map[i] means the index of the ground truth instance for the i-th proposal. """ with tf.name_scope('proposal_label'): batch_size = boxes.shape[0] # Scales ground truth boxes to the scaled image coordinates. image_scale = 1 / image_info[:, 2] scaled_gt_boxes = gt_boxes * tf.reshape(image_scale, [batch_size, 1, 1]) # The reference implementation intentionally includes ground truth boxes in # the proposals. see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359. # pylint: disable=line-too-long boxes = tf.concat([boxes, scaled_gt_boxes], axis=1) iou = box_utils.bbox_overlap(boxes, scaled_gt_boxes) (pre_sample_box_targets, pre_sample_class_targets, max_overlap, proposal_to_label_map) = _add_class_assignments( iou, scaled_gt_boxes, gt_labels) # Generates a random sample of RoIs comprising foreground and background # examples. reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132 # pylint: disable=line-too-long positives = tf.greater(max_overlap, fg_thresh * tf.ones_like(max_overlap)) negatives = tf.logical_and( tf.greater_equal(max_overlap, bg_thresh_lo * tf.ones_like(max_overlap)), tf.less(max_overlap, bg_thresh_hi * tf.ones_like(max_overlap))) pre_sample_class_targets = tf.where( negatives, tf.zeros_like(pre_sample_class_targets), pre_sample_class_targets) proposal_to_label_map = tf.where(negatives, tf.zeros_like(proposal_to_label_map), proposal_to_label_map) # Handles ground truth paddings. ignore_mask = tf.less(tf.reduce_min(iou, axis=2), tf.zeros_like(max_overlap)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. labels = positives pos_or_neg = tf.logical_or(positives, negatives) indicator = tf.logical_and(pos_or_neg, tf.logical_not(ignore_mask)) all_samples = [] sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=fg_fraction, is_static=True)) # Batch-unroll the sub-sampling process. for i in range(batch_size): samples = sampler.subsample(indicator[i], batch_size_per_im, labels[i]) all_samples.append(samples) all_samples = tf.stack([all_samples], axis=0)[0] # A workaround to get the indices from the boolean tensors. _, samples_indices = tf.nn.top_k(tf.to_int32(all_samples), k=batch_size_per_im, sorted=True) # Contructs indices for gather. samples_indices = tf.reshape( samples_indices + tf.expand_dims(tf.range(batch_size) * tf.shape(boxes)[1], 1), [-1]) rois = tf.reshape( tf.gather(tf.reshape(boxes, [-1, 4]), samples_indices), [batch_size, -1, 4]) class_targets = tf.reshape( tf.gather(tf.reshape(pre_sample_class_targets, [-1, 1]), samples_indices), [batch_size, -1]) sample_box_targets = tf.reshape( tf.gather(tf.reshape(pre_sample_box_targets, [-1, 4]), samples_indices), [batch_size, -1, 4]) sample_proposal_to_label_map = tf.reshape( tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]), samples_indices), [batch_size, -1]) return sample_box_targets, class_targets, rois, sample_proposal_to_label_map