def select_over_all_levels(self, bboxlist, scorelist, cls_list, image_sizes): # num_images = len(image_sizes) results = [] for i, (boxes, labels, scores) in enumerate(zip(bboxlist, cls_list, scorelist)): # skip the background keep = batched_nms_rotated(boxes, scores, labels, self.nms_thresh) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] number_of_detections = boxes.size(0) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = scores.clone() image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] result = Instances(image_sizes[i]) result.pred_boxes = RotatedBoxes(boxes) result.scores = scores result.pred_classes = labels results.append(result) return results
def test_batched_nms_rotated_0_degree_cuda(self): N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N) idxs = torch.randint(0, num_classes, (N, )) rotated_boxes = torch.zeros(N, 5) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: backup = boxes.clone() keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou) self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms") backup = rotated_boxes.clone() keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou) self.assertTrue( torch.allclose(rotated_boxes, backup), "rotated_boxes modified by batched_nms_rotated", ) self.assertLessEqual(nms_edit_distance(keep, keep_ref), 2, err_msg.format(iou))
def bev_nms(kitti_labels, iou_threshold): from detectron2.layers import batched_nms_rotated import numpy as np import torch #? NOTE: This might be different from elsewhere. However, does not matter because this CATEGORY_TO_IDX will never #? have any influence outside this function. CATEGORY_TO_IDX = { "Car": 0, "Pedestrian": 1, "Cyclist": 2, "Motorcycle": 3, "Undefined": 4 } boxes = [] scores = [] idxs = [] #! For each label, maps its index in boxes (and scores, idx) to #! -> a tuple (index of its parent (view) in kitti_labels, its index inside its kitti_label) overall_idx_to_label_idx = dict() curr_overall_idx = 0 for kitti_label_idx, kitti_label in enumerate(kitti_labels): for label_idx, label in enumerate(kitti_label): # should be (x_ctr, y_ctr, width, height, angle_degrees) boxes.append([ label.t[0], label.t[2], label.l, label.w, label.ry * (180.0 / np.pi) ]) scores.append(label.score) idxs.append(CATEGORY_TO_IDX[label.type]) overall_idx_to_label_idx[curr_overall_idx] = (kitti_label_idx, label_idx) curr_overall_idx += 1 if len(boxes) == 0: #! No detections return kitti_labels boxes = torch.FloatTensor(boxes).to("cuda") scores = torch.FloatTensor(scores).to("cuda") idxs = torch.LongTensor(idxs).to("cuda") #! Performs per-class nms resulting_box_inds = batched_nms_rotated( boxes, scores, idxs, iou_threshold ) keep_inds = [[] for i in range(len(kitti_labels))] for overall_idx in resulting_box_inds.cpu().tolist(): kitti_label_idx, label_idx = overall_idx_to_label_idx[overall_idx] keep_inds[kitti_label_idx].append(label_idx) for kitti_label_idx, kitti_label in enumerate(kitti_labels): kitti_label.labels = [kitti_label.labels[i] for i in keep_inds[kitti_label_idx]] del boxes, scores, idxs, resulting_box_inds return kitti_labels
def grasp_fast_rcnn_inference_single_image_rotated(scores, boxes, tilts, zs, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return rotated bounding-box detection results by thresholding on scores and applying rotated non-maximum suppression (Rotated NMS). Args: Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference_rotated`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) & torch.isfinite(tilts).all(dim=1) & torch.isfinite(zs).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] tilts = tilts[valid_mask] zs = zs[valid_mask] B = 5 # box dimension scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // B # Convert to Boxes to use the `clip` function ... boxes = RotatedBoxes(boxes.reshape(-1, B)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) # R x C x B # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] tilts = tilts[filter_inds[:, 0]] zs = zs[filter_inds[:, 0]] # Apply per-class Rotated NMS keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] tilts, zs = tilts[keep], zs[keep] result = Instances(image_shape) result.pred_boxes = RotatedBoxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] result.pred_zs = torch.flatten(zs) result.pred_tilts = torch.flatten(tilts) return result, filter_inds[:, 0]
def test_batched_nms_rotated_0_degree_cpu(self, device="cpu"): N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N, device=device) idxs = torch.randint(0, num_classes, (N,)) rotated_boxes = torch.zeros(N, 5, device=device) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: backup = boxes.clone() keep_ref = batched_nms(boxes, scores, idxs, iou) assert torch.allclose(boxes, backup), "boxes modified by batched_nms" backup = rotated_boxes.clone() keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) assert torch.allclose( rotated_boxes, backup ), "rotated_boxes modified by batched_nms_rotated" # Occasionally the gap can be large if there are many IOU on the threshold boundary self.assertLessEqual(nms_edit_distance(keep, keep_ref), 5, err_msg.format(iou))
def test_batched_nms_rotated_0_degree_cpu(self): # torch.manual_seed(0) N = 2000 num_classes = 50 boxes, scores = self._create_tensors(N) idxs = torch.randint(0, num_classes, (N, )) rotated_boxes = torch.zeros(N, 5) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" for iou in [0.2, 0.5, 0.8]: backup = boxes.clone() keep_ref = batched_nms(boxes, scores, idxs, iou) assert torch.allclose(boxes, backup), "boxes modified by batched_nms" backup = rotated_boxes.clone() keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) assert torch.allclose( rotated_boxes, backup), "rotated_boxes modified by batched_nms_rotated" assert torch.equal(keep, keep_ref), err_msg.format(iou)
def merge_branch_instances(instances, num_branch, nms_thrsh, topk_per_image): """ Merge detection results from different branches of TridentNet. Return detection results by applying non-maximum suppression (NMS) on bounding boxes and keep the unsuppressed boxes and other instances (e.g mask) if any. Args: instances (list[Instances]): A list of N * num_branch instances that store detection results. Contain N images and each image has num_branch instances. num_branch (int): Number of branches used for merging detection results for each image. nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. topk_per_image (int): The number of top scoring detections to return. Set < 0 to return all detections. Returns: results: (list[Instances]): A list of N instances, one for each image in the batch, that stores the topk most confidence detections after merging results from multiple branches. """ batch_size = len(instances) // num_branch results = [] for i in range(batch_size): ins = [] for j in range(num_branch): ins.append(instances[i + batch_size * j]) instance = Instances.cat(ins) # Apply per-class NMS keep = batched_nms_rotated( instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thrsh ) keep = keep[:topk_per_image] result = instance[keep] results.append(result) return results
def find_top_rrpn_proposals( proposals, pred_objectness_logits, images, nms_thresh, pre_nms_topk, post_nms_topk, min_box_side_len, training, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps if `training` is True, otherwise, returns the highest `post_nms_topk` scoring proposals for each feature map. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). images (ImageList): Input images as an :class:`ImageList`. nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RRPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RRPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_side_len (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: proposals (list[Instances]): list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i. """ image_sizes = images.image_sizes # in (h, w) order num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip(itertools.count(), proposals, pred_objectness_logits): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 5 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i, ), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results = [] for n, image_size in enumerate(image_sizes): boxes = RotatedBoxes(topk_proposals[n]) scores_per_img = topk_scores[n] valid_mask = torch.isfinite( boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_side_len) lvl = level_ids if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], level_ids[keep]) keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, vp_bins=None, vp=None, vp_res=None, rotated_box_training=False, h=None): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS if not rotated_box_training or len(boxes) == 0: keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) else: # BBox with encoding ctr_x,ctr_y,w,l if vp is not None and vp_bins is not None: _vp = vp.view(-1, num_bbox_reg_classes, vp_bins) # R x C x bins _vp = _vp[filter_mask] if len(_vp) > 0: _, vp_max = torch.max(_vp, 1) vp_filtered = vp_max if vp_res is not None: _vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins) _vp_res = _vp_res[filter_mask] vp_res_filtered = list() for i, k in enumerate(vp_max): vp_res_filtered.append(_vp_res[i, k]) else: vp_filtered = _vp rboxes = [] for i in range(boxes.shape[0]): box = boxes[i] angle = anglecorrection(vp_res_filtered[i] * 180 / math.pi).to( box.device) if vp_res is not None else bin2ang( vp_filtered[i], vp_bins).to(box.device) box = torch.cat((box, angle)) rboxes.append(box) rboxes = torch.cat(rboxes).reshape(-1, 5).to(vp_filtered.device) #keep = nms_rotated(rboxes, scores, nms_thresh) keep = batched_nms_rotated(rboxes, scores, filter_inds[:, 1], nms_thresh) else: boxes[:, :, 2] = boxes[:, :, 2] + boxes[:, :, 0] #x2 boxes[:, :, 3] = boxes[:, :, 3] + boxes[:, :, 1] #y2 keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] if vp is not None and vp_bins is not None: vp = vp.view(-1, num_bbox_reg_classes, vp_bins) # R x C x bins vp = vp[filter_mask] vp = vp[keep] if vp_res is not None: vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins) vp_res = vp_res[filter_mask] vp_res = vp_res[keep] if len(vp) > 0: _, vp_max = torch.max(vp, 1) result.viewpoint = vp_max if vp_res is not None: vp_res_filtered = list() for i, k in enumerate(vp_max): vp_res_filtered.append(vp_res[i, k]) # This result is directly the yaw orientation predicted result.viewpoint_residual = torch.tensor(vp_res_filtered).to( vp_max.device) else: result.viewpoint = vp result.viewpoint_residual = vp_res if h is not None: h = h.view(-1, num_bbox_reg_classes, 2) # R x C x bins h = h[filter_mask] h = h[keep] result.height = h return result, filter_inds[:, 0]
def inference_single_image(self, box_cls, box_delta, anchors, image_size): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Arguments: box_cls (list[Tensor]): list of #feature levels. Each entry contains tensor of size (H x W x A, K) box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. anchors (list[Boxes]): list of #feature levels. Each entry contains a Boxes object, which contains all the anchors for that image in that feature level. image_size (tuple(H, W)): a tuple of the image height and width. Returns: Same as `inference`, but for only one image. """ boxes_all = [] scores_all = [] class_idxs_all = [] # Iterate over every feature level for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors): # (HxWxAxK,) box_cls_i = box_cls_i.flatten().sigmoid_() # Keep top k top scoring indices only. num_topk = min(self.topk_candidates, box_reg_i.size(0)) # torch.sort is actually faster than .topk (at least on GPUs) predicted_prob, topk_idxs = box_cls_i.sort(descending=True) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] anchor_idxs = topk_idxs // self.num_classes classes_idxs = topk_idxs % self.num_classes box_reg_i = box_reg_i[anchor_idxs] anchors_i = anchors_i[anchor_idxs] # predict boxes predicted_boxes = self.box2box_transform.apply_deltas( box_reg_i, anchors_i.tensor) boxes_all.append(predicted_boxes) scores_all.append(predicted_prob) class_idxs_all.append(classes_idxs) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] keep = batched_nms_rotated(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = RotatedBoxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
return hrbb_box, pt_inbox, obb_box hbbs = [] pt_ins = [] obbs = [] for poly in poly_rotated_box: box = get_all_groundthurth(poly) hbbs.append(box[0]) pt_ins.append(box[1]) obbs.append(box[2]) hbbs = np.array(hbbs) pt_ins = np.array(pt_ins) obbs = np.array(obbs) hbbs = torch.from_numpy(hbbs).to(torch.device('cuda')) pt_ins = torch.from_numpy(pt_ins).to(torch.device('cuda')) obbs = torch.from_numpy(obbs).to(torch.device('cuda')) hbbs_wh = BoxMode.convert(hbbs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) polys = batch_hbb_hw2poly(hbbs.float(), hbbs_wh.float(), pt_ins.float(), 'tensor') rotated_boxes = batch_polygonToRotRectangle(polys) #pred_boxes = RotatedBoxes(rotated_boxes) #rotated_box = batch_polygonToRotRectangle(poly_rotated_box) scores = torch.Tensor([0.89, 0.92]) pred_classes = torch.Tensor([0, 0]) rotated_boxes[0, 4] = 90.0 rotated_boxes[1, 4] = 90.0 keep = batched_nms_rotated(rotated_boxes, scores.cuda(), pred_classes, 0.5)