def test_batched_nms(self): import mmcv from mmcv.ops import batched_nms results = mmcv.load('./tests/data/batched_nms_data.pkl') nms_max_num = 100 nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.5, max_num=nms_max_num) boxes, keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) nms_cfg.update(split_thr=100) seq_boxes, seq_keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) assert torch.equal(keep, seq_keep) assert torch.equal(boxes, seq_boxes) assert torch.equal(keep, torch.from_numpy(results['keep'][:nms_max_num])) nms_cfg = dict(type='soft_nms', iou_threshold=0.7) boxes, keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) nms_cfg.update(split_thr=100) seq_boxes, seq_keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) assert torch.equal(keep, seq_keep) assert torch.equal(boxes, seq_boxes) # test skip nms when `nms_cfg` is None seq_boxes, seq_keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), None, class_agnostic=False) assert len(seq_keep) == len(results['boxes']) # assert score is descending order assert ((seq_boxes[:, -1][1:] - seq_boxes[:, -1][:-1]) < 0).all()
def _bboxes_nms(self, bboxes, labels, cfg): if labels.numel() == 0: return bboxes, labels if 'nms_cfg' in cfg: warning.warn('nms_cfg in test_cfg will be deprecated. ' 'Please rename it as nms') if 'nms' not in cfg: cfg.nms = cfg.nms_cfg out_bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:, -1], labels, cfg.nms) out_labels = labels[keep] if len(out_bboxes) > 0: # use `sort` to replace with `argsort` here _, idx = torch.sort(out_bboxes[:, -1], descending=True) max_per_img = out_bboxes.new_tensor(cfg.max_per_img).to(torch.long) nms_after = max_per_img if torch.onnx.is_in_onnx_export(): # Always keep topk op for dynamic input in onnx from mmdet.core.export import get_k_for_topk nms_after = get_k_for_topk(max_per_img, out_bboxes.shape[0]) idx = idx[:nms_after] out_bboxes = out_bboxes[idx] out_labels = out_labels[idx] return out_bboxes, out_labels
def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None): num_classes = multi_scores.size(1) - 1 # exclude background category if multi_bboxes.shape[1] > 4: bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4) else: bboxes = multi_bboxes[:, None].expand(-1, num_classes, 4) scores = multi_scores[:, :-1] # filter out boxes with low scores scaled_scores = scores * score_factors[:, None] valid_mask = scores > score_thr bboxes = bboxes[valid_mask] scores = scores[valid_mask] scaled_scores = scaled_scores[valid_mask] labels = valid_mask.nonzero()[:, 1] if bboxes.numel() == 0: bboxes = multi_bboxes.new_zeros((0, 5)) labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) return bboxes, labels dets, keep = batched_nms(bboxes, scaled_scores, labels, nms_cfg) if max_num > 0: dets = dets[:max_num] keep = keep[:max_num] scores = scores[keep] dets[:, -1] = scores return dets, labels[keep]
def test_batched_nms(self): import mmcv from mmcv.ops import batched_nms results = mmcv.load('./tests/data/batched_nms_data.pkl') nms_max_num = 100 nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.5, max_num=nms_max_num) boxes, keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) nms_cfg.update(split_thr=100) seq_boxes, seq_keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) assert torch.equal(keep, seq_keep) assert torch.equal(boxes, seq_boxes) assert torch.equal(keep, torch.from_numpy(results['keep'][:nms_max_num])) nms_cfg = dict(type='soft_nms', iou_threshold=0.7) boxes, keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) nms_cfg.update(split_thr=100) seq_boxes, seq_keep = batched_nms(torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) assert torch.equal(keep, seq_keep) assert torch.equal(boxes, seq_boxes)
def _bboxes_nms(self, bboxes, labels, cfg): if labels.numel() > 0: max_num = cfg.max_per_img bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:, -1].contiguous(), labels, cfg.nms) if max_num > 0: bboxes = bboxes[:max_num] labels = labels[keep][:max_num] return bboxes, labels
def _bboxes_nms(self, bboxes, labels, cfg): out_bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:, -1], labels, cfg.nms_cfg) out_labels = labels[keep] if len(out_bboxes) > 0: idx = torch.argsort(out_bboxes[:, -1], descending=True) idx = idx[:cfg.max_per_img] out_bboxes = out_bboxes[idx] out_labels = out_labels[idx] return out_bboxes, out_labels
def _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors, level_ids, cfg, img_shape, **kwargs): """bbox post-processing method. The boxes would be rescaled to the original image scale and do the nms operation. Usually with_nms is False is used for aug test. Args: mlvl_scores (list[Tensor]): Box scores from all scale levels of a single image, each item has shape (num_bboxes, num_class). mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale levels of a single image, each item has shape (num_bboxes, 4). mlvl_valid_anchors (list[Tensor]): Anchors of all scale level each item has shape (num_bboxes, 4). level_ids (list[Tensor]): Indexes from all scale levels of a single image, each item has shape (num_bboxes, ). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. img_shape (tuple(int)): Shape of current image. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ scores = torch.cat(mlvl_scores) anchors = torch.cat(mlvl_valid_anchors) rpn_bbox_pred = torch.cat(mlvl_bboxes) proposals = self.bbox_coder.decode( anchors, rpn_bbox_pred, max_shape=img_shape) ids = torch.cat(level_ids) if cfg.min_bbox_size >= 0: w = proposals[:, 2] - proposals[:, 0] h = proposals[:, 3] - proposals[:, 1] valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size) if not valid_mask.all(): proposals = proposals[valid_mask] scores = scores[valid_mask] ids = ids[valid_mask] if proposals.numel() > 0: dets, _ = batched_nms(proposals, scores, ids, cfg.nms) else: return proposals.new_zeros(0, 5) return dets[:cfg.max_per_img]
def _bboxes_nms(self, bboxes, labels, cfg): if labels.numel() == 0: return bboxes, labels out_bboxes, keep = batched_nms(bboxes[:, :4].contiguous(), bboxes[:, -1].contiguous(), labels, cfg.nms_cfg) out_labels = labels[keep] if len(out_bboxes) > 0: idx = torch.argsort(out_bboxes[:, -1], descending=True) idx = idx[:cfg.max_per_img] out_bboxes = out_bboxes[idx] out_labels = out_labels[idx] return out_bboxes, out_labels
def simple_test(self, x, proposal_list, img_metas, proposals=None, rescale=False): """Test without augmentation as follows: 1. Compute prediction bbox and label per branch. 2. Merge predictions of each branch according to scores of bboxes, i.e., bboxes with higher score are kept to give top-k prediction. """ assert self.with_bbox, 'Bbox head must be implemented.' det_bboxes_list, det_labels_list = self.simple_test_bboxes( x, img_metas, proposal_list, self.test_cfg, rescale=rescale) for _ in range(len(det_bboxes_list)): if det_bboxes_list[_].shape[0] == 0: det_bboxes_list[_] = det_bboxes_list[_].new_empty((0, 5)) trident_det_bboxes = torch.cat(det_bboxes_list, 0) trident_det_labels = torch.cat(det_labels_list, 0) if trident_det_bboxes.numel() == 0: det_bboxes = trident_det_bboxes.new_zeros((0, 5)) det_labels = trident_det_bboxes.new_zeros((0, ), dtype=torch.long) else: nms_bboxes = trident_det_bboxes[:, :4] nms_scores = trident_det_bboxes[:, 4].contiguous() nms_inds = trident_det_labels nms_cfg = self.test_cfg['nms'] det_bboxes, keep = batched_nms(nms_bboxes, nms_scores, nms_inds, nms_cfg) det_labels = trident_det_labels[keep] if self.test_cfg['max_per_img'] > 0: det_labels = det_labels[:self.test_cfg['max_per_img']] det_bboxes = det_bboxes[:self.test_cfg['max_per_img']] det_bboxes, det_labels = [det_bboxes], [det_labels] bbox_results = [ bbox2result(det_bboxes[i], det_labels[i], self.bbox_head.num_classes) for i in range(len(det_bboxes)) ] return bbox_results
def _bboxes_nms(self, bboxes, labels, cfg): if 'nms_cfg' in cfg: warning.warn('nms_cfg in test_cfg will be deprecated. ' 'Please rename it as nms') if 'nms' not in cfg: cfg.nms = cfg.nms_cfg if labels.numel() > 0: max_num = cfg.max_per_img bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:, -1].contiguous(), labels, cfg.nms) if max_num > 0: bboxes = bboxes[:max_num] labels = labels[keep][:max_num] return bboxes, labels
def merge_trident_bboxes(self, trident_det_bboxes, trident_det_labels): """Merge bbox predictions of each branch.""" if trident_det_bboxes.numel() == 0: det_bboxes = trident_det_bboxes.new_zeros((0, 5)) det_labels = trident_det_bboxes.new_zeros((0, ), dtype=torch.long) else: nms_bboxes = trident_det_bboxes[:, :4] nms_scores = trident_det_bboxes[:, 4].contiguous() nms_inds = trident_det_labels nms_cfg = self.test_cfg['nms'] det_bboxes, keep = batched_nms(nms_bboxes, nms_scores, nms_inds, nms_cfg) det_labels = trident_det_labels[keep] if self.test_cfg['max_per_img'] > 0: det_labels = det_labels[:self.test_cfg['max_per_img']] det_bboxes = det_bboxes[:self.test_cfg['max_per_img']] return det_bboxes, det_labels
def test_nms_rotated(self): from mmcv.ops import nms_rotated np_boxes = np.array( [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8], [3.0, 7.0, 10.0, 12.0, 0.3, 0.5], [1.0, 4.0, 13.0, 7.0, 0.6, 0.9] ], dtype=np.float32) np_expect_dets = np.array( [[1.0, 4.0, 13.0, 7.0, 0.6], [3.0, 6.0, 9.0, 11.0, 0.6], [6.0, 3.0, 8.0, 7.0, 0.5]], dtype=np.float32) np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64) boxes = torch.from_numpy(np_boxes).cuda() # test cw angle definition dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5) assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) # test ccw angle definition boxes[..., -2] *= -1 dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5, clockwise=False) dets[..., -2] *= -1 assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) # test batched_nms with nms_rotated from mmcv.ops import batched_nms nms_cfg = dict(type='nms_rotated', iou_threshold=0.5) boxes, keep = batched_nms(torch.from_numpy(np_boxes[:, :5]), torch.from_numpy(np_boxes[:, -1]), torch.from_numpy(np.array([0, 0, 0, 0])), nms_cfg, class_agnostic=False) assert np.allclose(boxes.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep.cpu().numpy(), np_expect_keep_inds)
def _hooked_nms(bboxes, scores, feat, score_thr, nms, max_per_img=-1): scores = scores[:, :-1] num_samples, num_classes = scores.size() if bboxes.size(1) > 4: bboxes = bboxes.view(num_samples, -1, 4) else: bboxes = bboxes[:, None].expand(num_samples, num_classes, 4) gt_blob = feat.clone() feat = feat.repeat_interleave(80, dim=0) conf = scores.repeat_interleave(80, dim=0) labels = bboxes.new_tensor([0] + [1] * (num_classes - 1), dtype=torch.long) labels = labels.view(1, -1).expand_as(scores) bboxes = bboxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) valid_mask = scores > score_thr inds = valid_mask.nonzero()[:, 0] bboxes, scores, labels = bboxes[inds], scores[inds], labels[inds] if bboxes.numel() == 0: return gt_blob, bboxes.new_empty(0, 1110) dets, keep = batched_nms(bboxes, scores, labels, nms) if max_per_img > 0: dets = dets[:max_per_img] keep = keep[:max_per_img] bbox = dets[:, :4] conf = conf[inds][keep] feat = feat[inds][keep] score = dets[:, None, -1] label = labels[keep, None] dt_blob = torch.cat((bbox, conf, feat, score, label), dim=1) return gt_blob, dt_blob
def _bboxes_nms(self, bboxes, labels, cfg): if labels.numel() == 0: return bboxes, labels if 'nms_cfg' in cfg: warning.warn('nms_cfg in test_cfg will be deprecated. ' 'Please rename it as nms') if 'nms' not in cfg: cfg.nms = cfg.nms_cfg out_bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:, -1], labels, cfg.nms) out_labels = labels[keep] if len(out_bboxes) > 0: idx = torch.argsort(out_bboxes[:, -1], descending=True) idx = idx[:cfg.max_per_img] out_bboxes = out_bboxes[idx] out_labels = out_labels[idx] return out_bboxes, out_labels
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(dim=1)[:, 0] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: # sort is faster than topk # _, topk_inds = scores.topk(cfg.nms_pre) ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:cfg.nms_pre] scores = ranked_scores[:cfg.nms_pre] rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full((scores.size(0), ), idx, dtype=torch.long)) scores = torch.cat(mlvl_scores) anchors = torch.cat(mlvl_valid_anchors) rpn_bbox_pred = torch.cat(mlvl_bbox_preds) proposals = self.bbox_coder.decode(anchors, rpn_bbox_pred, max_shape=img_shape) ids = torch.cat(level_ids) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] h = proposals[:, 3] - proposals[:, 1] valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_inds.sum().item() != len(proposals): proposals = proposals[valid_inds, :] scores = scores[valid_inds] ids = ids[valid_inds] # TODO: remove the hard coded nms type nms_cfg = dict(type='nms', iou_threshold=cfg.nms_thr) dets, keep = batched_nms(proposals, scores, ids, nms_cfg) return dets[:cfg.nms_post]
def _get_bboxes(self, cls_scores, bbox_preds, mlvl_anchors, img_shapes, scale_factors, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shapes (list[tuple[int]]): Shape of the input image, (height, width, 3). scale_factors (list[ndarray]): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. The second item is a (n,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg cfg = copy.deepcopy(cfg) # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] batch_size = cls_scores[0].shape[0] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(-1)[..., 0] rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, 4) anchors = mlvl_anchors[idx] anchors = anchors.expand_as(rpn_bbox_pred) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and rpn_bbox_pred.size(1) > nms_pre: # sort is faster than topk ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:, :cfg.nms_pre] scores = ranked_scores[:, :cfg.nms_pre] batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] anchors = anchors[batch_inds, topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full(( batch_size, scores.size(1), ), idx, dtype=torch.long)) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1) batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1) batch_mlvl_proposals = self.bbox_coder.decode(batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes) batch_mlvl_ids = torch.cat(level_ids, dim=1) result_list = [] for (mlvl_proposals, mlvl_scores, mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores, batch_mlvl_ids): if cfg.min_bbox_size >= 0: w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0] h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1] valid_ind = torch.nonzero((w > cfg.min_bbox_size) & (h > cfg.min_bbox_size), as_tuple=False).squeeze() if valid_ind.sum().item() != len(mlvl_proposals): mlvl_proposals = mlvl_proposals[valid_ind, :] mlvl_scores = mlvl_scores[valid_ind] mlvl_ids = mlvl_ids[valid_ind] dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids, cfg.nms) result_list.append(dets[:cfg.max_per_img]) return result_list
def _bbox_post_process(self, mlvl_scores, mlvl_labels, mlvl_bboxes, scale_factor, cfg, rescale=False, with_nms=True, mlvl_score_factors=None, **kwargs): """bbox post-processing method. The boxes would be rescaled to the original image scale and do the nms operation. Usually with_nms is False is used for aug test. Args: mlvl_scores (list[Tensor]): Box scores from all scale levels of a single image, each item has shape (num_bboxes, ). mlvl_labels (list[Tensor]): Box class labels from all scale levels of a single image, each item has shape (num_bboxes, ). mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale levels of a single image, each item has shape (num_bboxes, 4). scale_factor (ndarray, optional): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. mlvl_score_factors (list[Tensor], optional): Score factor from all scale levels of a single image, each item has shape (num_bboxes, ). Default: None. Returns: tuple[Tensor]: Results of detected bboxes and labels. If with_nms is False and mlvl_score_factor is None, return mlvl_bboxes and mlvl_scores, else return mlvl_bboxes, mlvl_scores and mlvl_score_factor. Usually with_nms is False is used for aug test. If with_nms is True, then return the following format - det_bboxes (Tensor): Predicted bboxes with shape \ [num_bboxes, 5], where the first 4 columns are bounding \ box positions (tl_x, tl_y, br_x, br_y) and the 5-th \ column are scores between 0 and 1. - det_labels (Tensor): Predicted labels of the corresponding \ box with shape [num_bboxes]. """ assert len(mlvl_scores) == len(mlvl_bboxes) == len(mlvl_labels) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) mlvl_labels = torch.cat(mlvl_labels) if mlvl_score_factors is not None: # TODO: Add sqrt operation in order to be consistent with # the paper. mlvl_score_factors = torch.cat(mlvl_score_factors) mlvl_scores = mlvl_scores * mlvl_score_factors if with_nms: if mlvl_bboxes.numel() == 0: det_bboxes = torch.cat([mlvl_bboxes, mlvl_scores[:, None]], -1) return det_bboxes, mlvl_labels det_bboxes, keep_idxs = batched_nms(mlvl_bboxes, mlvl_scores, mlvl_labels, cfg.nms) det_bboxes = det_bboxes[:cfg.max_per_img] det_labels = mlvl_labels[keep_idxs][:cfg.max_per_img] return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores, mlvl_labels
def _get_bboxes(self, cls_scores, bbox_preds, mlvl_anchors, img_shapes, scale_factors, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shapes (list[tuple[int]]): Shape of the input image, (height, width, 3). scale_factors (list[ndarray]): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. The second item is a (n,) tensor where each item is the predicted class labelof the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg cfg = copy.deepcopy(cfg) # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] batch_size = cls_scores[0].shape[0] nms_pre_tensor = torch.tensor(cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long) for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(-1)[..., 0] rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, 4) anchors = mlvl_anchors[idx] anchors = anchors.expand_as(rpn_bbox_pred) # Get top-k prediction from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, rpn_bbox_pred.shape[1]) if nms_pre > 0: _, topk_inds = scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 if torch.onnx.is_in_onnx_export(): # Mind k<=3480 in TensorRT for TopK transformed_inds = scores.shape[1] * batch_inds + topk_inds scores = scores.reshape(-1, 1)[transformed_inds].reshape( batch_size, -1) rpn_bbox_pred = rpn_bbox_pred.reshape( -1, 4)[transformed_inds, :].reshape(batch_size, -1, 4) anchors = anchors.reshape(-1, 4)[transformed_inds, :].reshape( batch_size, -1, 4) else: # sort is faster than topk ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:, :cfg.nms_pre] scores = ranked_scores[:, :cfg.nms_pre] batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] anchors = anchors[batch_inds, topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full(( batch_size, scores.size(1), ), idx, dtype=torch.long)) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1) batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1) batch_mlvl_proposals = self.bbox_coder.decode(batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes) batch_mlvl_ids = torch.cat(level_ids, dim=1) # deprecate arguments warning if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg: warnings.warn( 'In rpn_proposal or test_cfg, ' 'nms_thr has been moved to a dict named nms as ' 'iou_threshold, max_num has been renamed as max_per_img, ' 'name of original arguments and the way to specify ' 'iou_threshold of NMS will be deprecated.') if 'nms' not in cfg: cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr)) if 'max_num' in cfg: if 'max_per_img' in cfg: assert cfg.max_num == cfg.max_per_img, f'You ' \ f'set max_num and ' \ f'max_per_img at the same time, but get {cfg.max_num} ' \ f'and {cfg.max_per_img} respectively' \ 'Please delete max_num which will be deprecated.' else: cfg.max_per_img = cfg.max_num if 'nms_thr' in cfg: assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \ f' iou_threshold in nms and ' \ f'nms_thr at the same time, but get' \ f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \ f' respectively. Please delete the nms_thr ' \ f'which will be deprecated.' # Replace batched_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export(): from mmdet.core.export import add_dummy_nms_for_onnx batch_mlvl_scores = batch_mlvl_scores.unsqueeze(2) score_threshold = cfg.nms.get('score_thr', 0.0) nms_pre = cfg.get('deploy_nms_pre', cfg.max_per_img) dets, _ = add_dummy_nms_for_onnx(batch_mlvl_proposals, batch_mlvl_scores, cfg.max_per_img, cfg.nms.iou_threshold, score_threshold, nms_pre, cfg.max_per_img) return dets result_list = [] for (mlvl_proposals, mlvl_scores, mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores, batch_mlvl_ids): # Skip nonzero op while exporting to ONNX if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()): w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0] h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1] valid_ind = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_ind.sum().item() != len(mlvl_proposals): mlvl_proposals = mlvl_proposals[valid_ind, :] mlvl_scores = mlvl_scores[valid_ind] mlvl_ids = mlvl_ids[valid_ind] dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids, cfg.nms) result_list.append(dets[:cfg.max_per_img]) return result_list
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: Tensor: Labeled boxes have the shape of (n,5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg cfg = copy.deepcopy(cfg) # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(dim=1)[:, 0] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: # sort is faster than topk # _, topk_inds = scores.topk(cfg.nms_pre) if torch.onnx.is_in_onnx_export(): # sort op will be converted to TopK in onnx # and k<=3480 in TensorRT _, topk_inds = scores.topk(cfg.nms_pre) scores = scores[topk_inds] else: ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:cfg.nms_pre] scores = ranked_scores[:cfg.nms_pre] rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full((scores.size(0), ), idx, dtype=torch.long)) scores = torch.cat(mlvl_scores) anchors = torch.cat(mlvl_valid_anchors) rpn_bbox_pred = torch.cat(mlvl_bbox_preds) proposals = self.bbox_coder.decode(anchors, rpn_bbox_pred, max_shape=img_shape) ids = torch.cat(level_ids) # Skip nonzero op while exporting to ONNX if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()): w = proposals[:, 2] - proposals[:, 0] h = proposals[:, 3] - proposals[:, 1] valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_inds.sum().item() != len(proposals): proposals = proposals[valid_inds, :] scores = scores[valid_inds] ids = ids[valid_inds] # deprecate arguments warning if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg: warnings.warn( 'In rpn_proposal or test_cfg, ' 'nms_thr has been moved to a dict named nms as ' 'iou_threshold, max_num has been renamed as max_per_img, ' 'name of original arguments and the way to specify ' 'iou_threshold of NMS will be deprecated.') if 'nms' not in cfg: cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr)) if 'max_num' in cfg: if 'max_per_img' in cfg: assert cfg.max_num == cfg.max_per_img, f'You ' \ f'set max_num and ' \ f'max_per_img at the same time, but get {cfg.max_num} ' \ f'and {cfg.max_per_img} respectively' \ 'Please delete max_num which will be deprecated.' else: cfg.max_per_img = cfg.max_num if 'nms_thr' in cfg: assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \ f' iou_threshold in nms and ' \ f'nms_thr at the same time, but get' \ f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \ f' respectively. Please delete the nms_thr ' \ f'which will be deprecated.' dets, keep = batched_nms(proposals, scores, ids, cfg.nms) return dets[:cfg.max_per_img]
def nms_resampling_discrete(self, proposals, scores, ids, gt_bboxes, gt_labels, a_r, a_c, a_f): # proposal is considered as background when its iou with gt < 0.3 select_thresh = 0.3 out = [] # rare, common, frequent = self.get_category_frequency(gt_labels.device) frequent = torch.tensor([0, 3], device=gt_labels.device) common = torch.tensor([1, 4, 9], device=gt_labels.device) rare = torch.tensor([2, 5, 6, 7, 8, 10], device=gt_labels.device) rare_gtbox = torch.zeros((2000, 4), device=gt_labels.device) rare_gtbox_idx = 0 common_gtbox = torch.zeros((2000, 4), device=gt_labels.device) common_gtbox_idx = 0 frequent_gtbox = torch.zeros((2000, 4), device=gt_labels.device) frequent_gtbox_idx = 0 for gt_bbox, gt_label in zip(gt_bboxes, gt_labels): if gt_label in rare: rare_gtbox[rare_gtbox_idx, ...] = gt_bbox rare_gtbox_idx += 1 elif gt_label in common: common_gtbox[common_gtbox_idx, ...] = gt_bbox common_gtbox_idx += 1 else: frequent_gtbox[frequent_gtbox_idx, ...] = gt_bbox frequent_gtbox_idx += 1 rare_gtbox = rare_gtbox[:rare_gtbox_idx, ...] common_gtbox = common_gtbox[:common_gtbox_idx, ...] frequent_proposals, _ = batched_nms( proposals, scores, ids, dict(type='nms', iou_threshold=a_f)) if len(rare_gtbox) > 0: rare_proposals, _ = batched_nms( proposals, scores, ids, dict(type='nms', iou_threshold=a_r)) rare_overlaps = bbox_overlaps(rare_gtbox, rare_proposals[:, :4]) rare_max_overlaps, rare_argmax_overlaps = rare_overlaps.max(dim=0) rare_pos_inds = rare_max_overlaps >= select_thresh rare_proposals = rare_proposals[rare_pos_inds, :] out.append(rare_proposals) frequent_rare_overlaps = bbox_overlaps(rare_gtbox, frequent_proposals[:, :4]) frequent_rare_max_overlaps, frequent_rare_argmax_overlaps = frequent_rare_overlaps.max( dim=0) valid_inds = frequent_rare_max_overlaps < select_thresh frequent_proposals = frequent_proposals[valid_inds, :] if len(common_gtbox) > 0: # keep = self.nms_py(proposals, scores, a_c) common_proposals, _ = batched_nms( proposals, scores, ids, dict(type='nms', iou_threshold=a_c)) common_overlaps = bbox_overlaps(common_gtbox, common_proposals[:, :4]) common_max_overlaps, common_argmax_overlaps = common_overlaps.max( dim=0) common_pos_inds = common_max_overlaps >= select_thresh common_proposals = common_proposals[common_pos_inds, :] out.append(common_proposals) frequent_common_overlaps = bbox_overlaps(common_gtbox, frequent_proposals[:, :4]) frequent_common_max_overlaps, frequent_common_argmax_overlaps = frequent_common_overlaps.max( dim=0) valid_inds = frequent_common_max_overlaps < select_thresh frequent_proposals = frequent_proposals[valid_inds, :] out.append(frequent_proposals) if len(out) > 1: out_proposals = torch.cat(out, 0) else: out_proposals = frequent_proposals return out_proposals
def _get_bboxes_single_5(self, #v5.0 cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, gt_semantic_seg_single=None, # shape(1,H,W) >> 实际shape(H,W),因为1会被退化掉 cfg = None, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] # for i in range(cls_score.size()[0]): #单层 # H = cls_score[i].size()[-2] # W = cls_score[i].size()[-1] # mask_0 = gt_semantic_segs[i].squeeze() # mask_1 = torch.ne(mask_0, 0) # mask_2 = torch.lt(mask_0, 92) # mask_3 = torch.eq(mask_1, mask_2).type(torch.float32) # mask_3 = mask_3.unsqueeze(0) # mask_3 = mask_3.unsqueeze(0) # mask_3 = F.interpolate(mask_3, size=(H, W), mode='nearest') # mask_3 = mask_3.squeeze() # # label = labels[i] # mask_3 = mask_3.view(-1) # # print("mask_3: ",mask_3.size()) # # print("label: ",label.size()) # mask_3= mask_3.type(torch.long) # mask_3 = mask_3.view(mask_3.size()[0], -1) # mask_3 = mask_3.expand(mask_3.size()[0], 3) # mask_3 = mask_3.reshape(-1) # labels[i] = labels[i]* mask_3 for idx in range(len(cls_scores)): #单图 rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] # H = rpn_cls_score.size()[-2] # W = rpn_cls_score.size()[-1] # mask_0 = gt_semantic_seg_single.squeeze() # mask_1 = torch.ne(mask_0, 0) # mask_2 = torch.lt(mask_0, 92) # mask_3 = torch.eq(mask_1, mask_2).type(torch.float32) # mask_3 = mask_3.unsqueeze(0) # mask_3 = mask_3.unsqueeze(0) # mask_3 = F.interpolate(mask_3, size=(H, W), mode='nearest') # mask_3 = mask_3.squeeze() # mask_3 = mask_3.sigmoid() # mask_1 = torch.ne(semantic_pred_single, 0) # mask_2 = torch.lt(semantic_pred_single, 92) # mask_3 = torch.eq(mask_1, mask_2).type(torch.float32) # mask_3 = mask_3.sigmoid() # mask_3 = mask_3.unsqueeze(0) # mask_3 = mask_3.unsqueeze(0) # mask_3 = F.interpolate(mask_3, size=(H,W), mode='nearest') # mask_3 = mask_3.squeeze() if self.use_sigmoid_cls: # 3*w*h # scores = scores + mask_3 scores = scores.permute(1, 2, 0) scores = scores.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) # we set FG labels to [0, num_class-1] and BG label to # num_class in other heads since mmdet v2.0, However we # keep BG label as 0 and FG label as 1 in rpn head scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) anchors = mlvl_anchors[idx] if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: # sort is faster than topk # _, topk_inds = scores.topk(cfg.nms_pre) ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:cfg.nms_pre] scores = ranked_scores[:cfg.nms_pre] rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full((scores.size(0), ), idx, dtype=torch.long)) scores = torch.cat(mlvl_scores) anchors = torch.cat(mlvl_valid_anchors) rpn_bbox_pred = torch.cat(mlvl_bbox_preds) proposals = self.bbox_coder.decode( # 见delta2bbox,返回的shape N*4,不同的scale被凭借在同一维度 anchors, rpn_bbox_pred, max_shape=img_shape) centers = centers.type(torch.long) ids = torch.cat(level_ids)# 用来记录每个proposal所在的层数 if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] h = proposals[:, 3] - proposals[:, 1] valid_inds = torch.nonzero( (w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_inds.sum().item() != len(proposals): proposals = proposals[valid_inds, :] scores = scores[valid_inds] ids = ids[valid_inds] # TODO: remove the hard coded nms type nms_cfg = dict(type='nms', iou_threshold=cfg.nms_thr) dets, keep = batched_nms(proposals, scores, ids, nms_cfg) return dets[:cfg.nms_post]
def aug_test_bboxes_simple(self, feats, img_metas, rescale=False): """Test det bboxes with simple test-time augmentation, can be applied in DenseHead except for ``RPNHead`` and its variants, e.g., ``GARPNHead``, etc. Args: feats (list[Tensor]): the outer list indicates test-time augmentations and inner Tensor should have a shape NxCxHxW, which contains features for all images in the batch. img_metas (list[list[dict]]): the outer list indicates test-time augs (multiscale, flip, etc.) and the inner list indicates images in a batch. each dict has image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is ``bboxes`` with shape (n, 5), where 5 represent (tl_x, tl_y, br_x, br_y, score). The shape of the second tensor in the tuple is ``labels`` with shape (n,). The length of list should always be 1. """ # check with_nms argument gb_sig = signature(self.get_bboxes) gb_args = [p.name for p in gb_sig.parameters.values()] gbs_sig = signature(self._get_bboxes_single) gbs_args = [p.name for p in gbs_sig.parameters.values()] assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \ f'{self.__class__.__name__}' \ ' does not support test-time augmentation' aug_bboxes = [] aug_scores = [] aug_labels = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch outs = self.forward(x) bbox_outputs = self.get_bboxes(*outs, img_metas=img_meta, cfg=self.test_cfg, rescale=False, with_nms=False)[0] aug_bboxes.append(bbox_outputs[0]) aug_scores.append(bbox_outputs[1]) if len(bbox_outputs) >= 3: aug_labels.append(bbox_outputs[2]) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = self.merge_aug_bboxes( aug_bboxes, aug_scores, img_metas) merged_labels = torch.cat(aug_labels, dim=0) if aug_labels else None if merged_bboxes.numel() == 0: det_bboxes = torch.cat([merged_bboxes, merged_scores[:, None]], -1) return [ (det_bboxes, merged_labels), ] det_bboxes, keep_idxs = batched_nms(merged_bboxes, merged_scores, merged_labels, self.test_cfg.nms) det_bboxes = det_bboxes[:self.test_cfg.max_per_img] det_labels = merged_labels[keep_idxs][:self.test_cfg.max_per_img] if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= det_bboxes.new_tensor( img_metas[0][0]['scale_factor']) return [ (_det_bboxes, det_labels), ]
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for a single scale level has shape (num_classes, H, W). bbox_preds (list[Tensor]): Box distribution logits for a single scale level with shape (4*(n+1), H, W), n is max value of integral set. mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: tuple(Tensor): det_bboxes (Tensor): Bbox predictions in shape (N, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (N,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, stride, anchors in zip( cls_scores, bbox_preds, self.anchor_generator.strides, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert stride[0] == stride[1] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0) bbox_pred = self.integral(bbox_pred) * stride[0] nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = scores.max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = distance2bbox(self.anchor_center(anchors), bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: # det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, # cfg.score_thr, cfg.nms, # cfg.max_per_img) # return det_bboxes, det_labels from mmcv.ops import batched_nms mlvl_scores = mlvl_scores[:, 0] labels = torch.zeros_like(mlvl_scores, dtype=torch.int64) dets, keep = batched_nms(mlvl_bboxes, mlvl_scores.contiguous(), labels.contiguous(), cfg.nms) return dets[:cfg.max_per_img] else: return mlvl_bboxes, mlvl_scores