def testSortedNonMaxSuppressionOnTPUSpeed(self): boxes_np = np.random.rand(2, 12000, 4).astype(np.float32) scores_np = np.random.rand(2, 12000).astype(np.float32) iou_threshold_np = np.array(0.7, dtype=np.float32) boxes = tf.constant(boxes_np) scores = tf.constant(scores_np) iou_threshold = tf.constant(iou_threshold_np) # Runs on TPU. strategy = tf.distribute.experimental.TPUStrategy() with strategy.scope(): scores_tpu, boxes_tpu = nms.sorted_non_max_suppression_padded( boxes=boxes, scores=scores, max_output_size=2000, iou_threshold=iou_threshold) self.assertEqual(scores_tpu.numpy().shape, (2, 2000)) self.assertEqual(boxes_tpu.numpy().shape, (2, 2000, 4))
def testSortedNonMaxSuppressionOnCPU(self): boxes_np = np.array(self.boxes_data, dtype=np.float32) scores_np = np.array(self.scores_data, dtype=np.float32) iou_threshold_np = np.array(self.iou_threshold, dtype=np.float32) boxes = tf.constant(boxes_np) scores = tf.constant(scores_np) iou_threshold = tf.constant(iou_threshold_np) # Runs on CPU. scores_cpu, boxes_cpu = nms.sorted_non_max_suppression_padded( boxes=boxes, scores=scores, max_output_size=self.max_output_size, iou_threshold=iou_threshold) self.assertEqual(boxes_cpu.numpy().shape, (2, self.max_output_size, 4)) self.assertAllClose(scores_cpu.numpy(), [[0.9, 0.6, 0.4, 0.3, 0., 0.], [0.8, 0.7, 0.5, 0.3, 0., 0.]])
def _generate_detections_v2(boxes: tf.Tensor, scores: tf.Tensor, pre_nms_top_k: int = 5000, pre_nms_score_threshold: float = 0.05, nms_iou_threshold: float = 0.5, max_num_detections: int = 100): """Generates the final detections given the model outputs. This implementation unrolls classes dimension while using the tf.while_loop to implement the batched NMS, so that it can be parallelized at the batch dimension. It should give better performance comparing to v1 implementation. It is TPU compatible. Args: boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or `[batch_size, N, 1, 4]`, which box predictions on all feature levels. The N is the number of total anchors on all levels. scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which stacks class probability on all feature levels. The N is the number of total anchors on all levels. The num_classes is the number of classes predicted by the model. Note that the class_outputs here is the raw score. pre_nms_top_k: An `int` number of top candidate detections per class before NMS. pre_nms_score_threshold: A `float` representing the threshold for deciding when to remove boxes based on score. nms_iou_threshold: A `float` representing the threshold for deciding whether boxes overlap too much with respect to IOU. max_num_detections: A `scalar` representing maximum number of boxes retained over all classes. Returns: nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4] representing top detected boxes in [y1, x1, y2, x2]. nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections] representing sorted confidence scores for detected boxes. The values are between [0, 1]. nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections] representing classes for detected boxes. valid_detections: An `int` tf.Tensor of shape [batch_size] only the top `valid_detections` boxes are valid detections. """ with tf.name_scope('generate_detections'): nmsed_boxes = [] nmsed_classes = [] nmsed_scores = [] valid_detections = [] batch_size, _, num_classes_for_box, _ = boxes.get_shape().as_list() if batch_size is None: batch_size = tf.shape(boxes)[0] _, total_anchors, num_classes = scores.get_shape().as_list() # Selects top pre_nms_num scores and indices before NMS. scores, indices = _select_top_k_scores( scores, min(total_anchors, pre_nms_top_k)) for i in range(num_classes): boxes_i = boxes[:, :, min(num_classes_for_box - 1, i), :] scores_i = scores[:, :, i] # Obtains pre_nms_top_k before running NMS. boxes_i = tf.gather(boxes_i, indices[:, :, i], batch_dims=1, axis=1) # Filter out scores. boxes_i, scores_i = box_ops.filter_boxes_by_scores( boxes_i, scores_i, min_score_threshold=pre_nms_score_threshold) (nmsed_scores_i, nmsed_boxes_i) = nms.sorted_non_max_suppression_padded( tf.cast(scores_i, tf.float32), tf.cast(boxes_i, tf.float32), max_num_detections, iou_threshold=nms_iou_threshold) nmsed_classes_i = tf.fill([batch_size, max_num_detections], i) nmsed_boxes.append(nmsed_boxes_i) nmsed_scores.append(nmsed_scores_i) nmsed_classes.append(nmsed_classes_i) nmsed_boxes = tf.concat(nmsed_boxes, axis=1) nmsed_scores = tf.concat(nmsed_scores, axis=1) nmsed_classes = tf.concat(nmsed_classes, axis=1) nmsed_scores, indices = tf.nn.top_k( nmsed_scores, k=max_num_detections, sorted=True) nmsed_boxes = tf.gather(nmsed_boxes, indices, batch_dims=1, axis=1) nmsed_classes = tf.gather(nmsed_classes, indices, batch_dims=1) valid_detections = tf.reduce_sum( input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32), axis=1) return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def _multilevel_propose_rois(raw_boxes, raw_scores, anchor_boxes, image_shape, pre_nms_top_k=2000, pre_nms_score_threshold=0.0, pre_nms_min_size_threshold=0.0, nms_iou_threshold=0.7, num_proposals=1000, use_batched_nms=False, decode_boxes=True, clip_boxes=True, apply_sigmoid_to_score=True): """Proposes RoIs given a group of candidates from different FPN levels. The following describes the steps: 1. For each individual level: a. Apply sigmoid transform if specified. b. Decode boxes if specified. c. Clip boxes if specified. d. Filter small boxes and those fall outside image if specified. e. Apply pre-NMS filtering including pre-NMS top k and score thresholding. f. Apply NMS. 2. Aggregate post-NMS boxes from each level. 3. Apply an overall top k to generate the final selected RoIs. Args: raw_boxes: A `dict` with keys representing FPN levels and values representing box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4]. raw_scores: A `dict` with keys representing FPN levels and values representing logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. anchor_boxes: A `dict` with keys representing FPN levels and values representing anchor box tensors of shape [batch_size, feature_h * feature_w * num_anchors, 4]. image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension are [height, width] of the scaled image. pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep before applying NMS. Default: 2000. pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal box score to keep before applying NMS. This is often used as a pre-filtering step for better performance. Default: 0, no filtering is applied. pre_nms_min_size_threshold: A `float` representing the minimal box size in each side (w.r.t. the scaled image) to keep before applying NMS. This is often used as a pre-filtering step for better performance. Default: 0, no filtering is applied. nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold used for NMS. If 0.0, no NMS is applied. Default: 0.7. num_proposals: An `int` of top scoring RPN proposals *in total* to keep after applying NMS. Default: 1000. use_batched_nms: A `bool` indicating whether NMS is applied in batch using `tf.image.combined_non_max_suppression`. Currently only available in CPU/GPU. Default is False. decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded using `anchor_boxes`. If False, use `raw_boxes` directly and ignore `anchor_boxes`. Default is True. clip_boxes: A `bool` indicating whether boxes are first clipped to the scaled image size before appliying NMS. If False, no clipping is applied and `image_shape` is ignored. Default is True. apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to `raw_scores` before applying NMS. Default is True. Returns: selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4], representing the box coordinates of the selected proposals w.r.t. the scaled image. selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1], representing the scores of the selected proposals. """ with tf.name_scope('multilevel_propose_rois'): rois = [] roi_scores = [] image_shape = tf.expand_dims(image_shape, axis=1) for level in sorted(raw_scores.keys()): with tf.name_scope('level_%s' % level): _, feature_h, feature_w, num_anchors_per_location = ( raw_scores[level].get_shape().as_list()) num_boxes = feature_h * feature_w * num_anchors_per_location this_level_scores = tf.reshape(raw_scores[level], [-1, num_boxes]) this_level_boxes = tf.reshape(raw_boxes[level], [-1, num_boxes, 4]) this_level_anchors = tf.cast(tf.reshape( anchor_boxes[level], [-1, num_boxes, 4]), dtype=this_level_scores.dtype) if apply_sigmoid_to_score: this_level_scores = tf.sigmoid(this_level_scores) if decode_boxes: this_level_boxes = box_ops.decode_boxes( this_level_boxes, this_level_anchors) if clip_boxes: this_level_boxes = box_ops.clip_boxes( this_level_boxes, image_shape) if pre_nms_min_size_threshold > 0.0: this_level_boxes, this_level_scores = box_ops.filter_boxes( this_level_boxes, this_level_scores, image_shape, pre_nms_min_size_threshold) this_level_pre_nms_top_k = min(num_boxes, pre_nms_top_k) this_level_post_nms_top_k = min(num_boxes, num_proposals) if nms_iou_threshold > 0.0: if use_batched_nms: this_level_rois, this_level_roi_scores, _, _ = ( tf.image.combined_non_max_suppression( tf.expand_dims(this_level_boxes, axis=2), tf.expand_dims(this_level_scores, axis=-1), max_output_size_per_class= this_level_pre_nms_top_k, max_total_size=this_level_post_nms_top_k, iou_threshold=nms_iou_threshold, score_threshold=pre_nms_score_threshold, pad_per_class=False, clip_boxes=False)) else: if pre_nms_score_threshold > 0.0: this_level_boxes, this_level_scores = ( box_ops.filter_boxes_by_scores( this_level_boxes, this_level_scores, pre_nms_score_threshold)) this_level_boxes, this_level_scores = box_ops.top_k_boxes( this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k) this_level_roi_scores, this_level_rois = ( nms.sorted_non_max_suppression_padded( this_level_scores, this_level_boxes, max_output_size=this_level_post_nms_top_k, iou_threshold=nms_iou_threshold)) else: this_level_rois, this_level_roi_scores = box_ops.top_k_boxes( this_level_boxes, this_level_scores, k=this_level_post_nms_top_k) rois.append(this_level_rois) roi_scores.append(this_level_roi_scores) all_rois = tf.concat(rois, axis=1) all_roi_scores = tf.concat(roi_scores, axis=1) with tf.name_scope('top_k_rois'): _, num_valid_rois = all_roi_scores.get_shape().as_list() overall_top_k = min(num_valid_rois, num_proposals) selected_rois, selected_roi_scores = box_ops.top_k_boxes( all_rois, all_roi_scores, k=overall_top_k) return selected_rois, selected_roi_scores