def filter_results_fast(self, boxlist, num_classes, feature=None): """ perform only one NMS for all classes. """ assert boxlist.bbox.shape[1] == 4 scores = boxlist.get_field("scores").reshape(-1, num_classes) # for each box, select max conf exclude background scores, labels = scores[:, 1:].max(1) labels += 1 bbox = boxlist.bbox if not self.ignore_box_regression and not self.cls_agnostic_bbox_reg: bbox = bbox.reshape(-1, num_classes, 4).mean(1) boxlist.add_field("scores", scores) boxlist.add_field("labels", labels) boxlist.add_field("box_features", feature) # threshold by size and confidence # use a relatively low thresh to output enough boxes x1, y1, x2, y2 = bbox.split(1, dim=1) ws = (x2 - x1).squeeze(1) hs = (y2 - y1).squeeze(1) keep = ( (ws >= 0) & (hs >= 0) & (scores > self.score_thresh * 0.01) ).nonzero().squeeze(1) del ws, hs # apply nms to the previous low-thresholded results nms_boxes = box_nms(bbox[keep], scores[keep], self.nms) nms_idx = keep[nms_boxes] # indices that pass NMS and low-threshold nms_scores = scores[nms_idx] # sort above low-thresholded scores high to low _, idx = torch.sort(nms_scores, dim=0, descending=True) idx = nms_idx[idx] num_dets = (nms_scores >= self.score_thresh).long().sum() if not isinstance(num_dets, torch.Tensor): num_dets = torch.as_tensor(num_dets, device=scores.device) min_det = torch.stack([num_dets, torch.as_tensor(self.min_detections_per_img, device=scores.device)]).max() max_det = torch.stack([min_det, torch.as_tensor(self.detections_per_img, device=scores.device)]).min() keep_boxes = idx[:max_det] return boxlist[keep_boxes]
def test_nms_cpu(self): """ Match unit test UtilsNMSTest.TestNMS in caffe2/operators/generate_proposals_op_util_nms_test.cc """ inputs = (np.array([ 10, 10, 50, 60, 0.5, 11, 12, 48, 60, 0.7, 8, 9, 40, 50, 0.6, 100, 100, 150, 140, 0.9, 99, 110, 155, 139, 0.8, ]).astype(np.float32).reshape(-1, 5)) boxes = torch.from_numpy(inputs[:, :4]) scores = torch.from_numpy(inputs[:, 4]) test_thresh = [0.1, 0.3, 0.5, 0.8, 0.9] gt_indices = [[1, 3], [1, 3], [1, 3], [1, 2, 3, 4], [0, 1, 2, 3, 4]] for thresh, gt_index in zip(test_thresh, gt_indices): keep_indices = box_nms(boxes, scores, thresh) keep_indices = np.sort(keep_indices) np.testing.assert_array_equal(keep_indices, np.array(gt_index))
def test_nms1_cpu(self): """ Match unit test UtilsNMSTest.TestNMS1 in caffe2/operators/generate_proposals_op_util_nms_test.cc """ boxes = torch.from_numpy( np.array([ [350.9821, 161.8200, 369.9685, 205.2372], [250.5236, 154.2844, 274.1773, 204.9810], [471.4920, 160.4118, 496.0094, 213.4244], [352.0421, 164.5933, 366.4458, 205.9624], [166.0765, 169.7707, 183.0102, 232.6606], [252.3000, 183.1449, 269.6541, 210.6747], [469.7862, 162.0192, 482.1673, 187.0053], [168.4862, 174.2567, 181.7437, 232.9379], [470.3290, 162.3442, 496.4272, 214.6296], [251.0450, 155.5911, 272.2693, 203.3675], [252.0326, 154.7950, 273.7404, 195.3671], [351.7479, 161.9567, 370.6432, 204.3047], [496.3306, 161.7157, 515.0573, 210.7200], [471.0749, 162.6143, 485.3374, 207.3448], [250.9745, 160.7633, 264.1924, 206.8350], [470.4792, 169.0351, 487.1934, 220.2984], [474.4227, 161.9546, 513.1018, 215.5193], [251.9428, 184.1950, 262.6937, 207.6416], [252.6623, 175.0252, 269.8806, 213.7584], [260.9884, 157.0351, 288.3554, 206.6027], [251.3629, 164.5101, 263.2179, 202.4203], [471.8361, 190.8142, 485.6812, 220.8586], [248.6243, 156.9628, 264.3355, 199.2767], [495.1643, 158.0483, 512.6261, 184.4192], [376.8718, 168.0144, 387.3584, 201.3210], [122.9191, 160.7433, 172.5612, 231.3837], [350.3857, 175.8806, 366.2500, 205.4329], [115.2958, 162.7822, 161.9776, 229.6147], [168.4375, 177.4041, 180.8028, 232.4551], [169.7939, 184.4330, 181.4767, 232.1220], [347.7536, 175.9356, 355.8637, 197.5586], [495.5434, 164.6059, 516.4031, 207.7053], [172.1216, 194.6033, 183.1217, 235.2653], [264.2654, 181.5540, 288.4626, 214.0170], [111.7971, 183.7748, 137.3745, 225.9724], [253.4919, 186.3945, 280.8694, 210.0731], [165.5334, 169.7344, 185.9159, 232.8514], [348.3662, 184.5187, 354.9081, 201.4038], [164.6562, 162.5724, 186.3108, 233.5010], [113.2999, 186.8410, 135.8841, 219.7642], [117.0282, 179.8009, 142.5375, 221.0736], [462.1312, 161.1004, 495.3576, 217.2208], [462.5800, 159.9310, 501.2937, 224.1655], [503.5242, 170.0733, 518.3792, 209.0113], [250.3658, 195.5925, 260.6523, 212.4679], [108.8287, 163.6994, 146.3642, 229.7261], [256.7617, 187.3123, 288.8407, 211.2013], [161.2781, 167.4801, 186.3751, 232.7133], [115.3760, 177.5859, 163.3512, 236.9660], [248.9077, 188.0919, 264.8579, 207.9718], [108.1349, 160.7851, 143.6370, 229.6243], [465.0900, 156.7555, 490.3561, 213.5704], [107.5338, 173.4323, 141.0704, 235.2910], ]).astype(np.float32)) scores = torch.from_numpy( np.array([ 0.1919, 0.3293, 0.0860, 0.1600, 0.1885, 0.4297, 0.0974, 0.2711, 0.1483, 0.1173, 0.1034, 0.2915, 0.1993, 0.0677, 0.3217, 0.0966, 0.0526, 0.5675, 0.3130, 0.1592, 0.1353, 0.0634, 0.1557, 0.1512, 0.0699, 0.0545, 0.2692, 0.1143, 0.0572, 0.1990, 0.0558, 0.1500, 0.2214, 0.1878, 0.2501, 0.1343, 0.0809, 0.1266, 0.0743, 0.0896, 0.0781, 0.0983, 0.0557, 0.0623, 0.5808, 0.3090, 0.1050, 0.0524, 0.0513, 0.4501, 0.4167, 0.0623, 0.1749, ]).astype(np.float32)) gt_indices = np.array([ 1, 6, 7, 8, 11, 12, 13, 14, 17, 18, 19, 21, 23, 24, 25, 26, 30, 32, 33, 34, 35, 37, 43, 44, 47, 50, ]) keep_indices = box_nms(boxes, scores, 0.5) keep_indices = np.sort(keep_indices) np.testing.assert_array_equal(keep_indices, gt_indices)
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list of BoxList objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape num_anchors = A * H * W objectness = objectness.reshape(N, -1) # Now [N, AHW] objectness = objectness.sigmoid() pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) use_fast_cuda_path = objectness.is_cuda if use_fast_cuda_path: # New code batch_idx = torch.arange(N, device=device)[:, None] # Get all image shapes, and cat them together image_shapes = [box.size[::-1] for box in anchors] image_shapes_cat = torch.cat([ torch.tensor(box.size[::-1], device=objectness.device).float() for box in anchors ]) # Get a single tensor for all anchors concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # Note: Take all anchors, we'll index accordingly inside the kernel # only take the anchors corresponding to the topk boxes concat_anchors = concat_anchors.reshape(N, -1, 4) # [batch_idx, topk_idx] # Return pre-nms boxes, associated scores and keep flag # Encompasses: # 1. Box decode # 2. Box clipping # 3. Box filtering # At the end we need to keep only the proposals & scores flagged # Note: topk_idx, objectness are sorted => proposals, objectness, keep are also # sorted -- this is important later proposals, objectness, keep = C.GeneratePreNMSUprightBoxes( N, A, H, W, topk_idx, objectness.float( ), # Need to cast these as kernel doesn't support fp16 box_regression.float(), concat_anchors, image_shapes_cat, pre_nms_top_n, 0, # feature_stride self.min_size, self.box_coder.bbox_xform_clip, True) # view as [N, pre_nms_top_n, 4] proposals = proposals.view(N, -1, 4) objectness = objectness.view(N, -1) else: # put in the same format as anchors objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) objectness = objectness.sigmoid() box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # TODO check if this batch_idx is really needed batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size[::-1] for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) # handle non-optimized path without changing loop if not use_fast_cuda_path: keep = [None for _ in range(num_images)] # TODO optimize / make batch friendly sampled_bboxes = [] for proposal, score, im_shape, k in zip(proposals, objectness, image_shapes, keep): height, width = im_shape if proposal.dim() == 0: # TODO check what to do here # sampled_proposals.append(proposal.new()) # sampled_scores.append(score.new()) print("skipping") continue if False: # currently slower # TODO: Don't do this, generate k directly in bytes k = k.byte() proposal = proposal[k, :] score = score[k] # perform NMS - returns index mask of kept boxes if self.nms_thresh > 0: keep_mask = C.nms_gpu_upright(proposal, pre_nms_top_n, self.nms_thresh) # keep map should still be ordered by score - keep only the post_nms_top_n entries if self.post_nms_top_n > 0: keep_mask = keep_mask[:self.post_nms_top_n] # keep only selected boxes & scores keep_mask = keep_mask.long() p = proposal[keep_mask, :] score = score[keep_mask] else: if use_fast_cuda_path: k = k.byte() p = proposal.masked_select(k[:, None]).view(-1, 4) score = score.masked_select(k) if self.nms_thresh > 0: keep = box_nms(p, score, self.nms_thresh) if self.post_nms_top_n > 0: keep = keep[:self.post_nms_top_n] p = p.index_select(0, keep) score = score.index_select(0, keep) # Common code path sampled_bbox = BoxList(p, (width, height), mode="xyxy") sampled_bbox.add_field("objectness", score) sampled_bboxes.append(sampled_bbox) # TODO maybe also copy the other fields that were originally present? return sampled_bboxes