def onnx_export(self, x, img_metas): """Test without augmentation. Args: x (tuple[Tensor]): Features from the upstream network, each is a 4D-tensor. img_metas (list[dict]): Meta info of each image. Returns: Tensor: dets of shape [N, num_det, 5]. """ cls_scores, bbox_preds = self(x) assert len(cls_scores) == len(bbox_preds) batch_bboxes, batch_scores = super(RPNHead, self).onnx_export( cls_scores, bbox_preds, img_metas=img_metas, with_nms=False) # Use ONNX::NonMaxSuppression in deployment from mmdet.core.export import add_dummy_nms_for_onnx cfg = copy.deepcopy(self.test_cfg) score_threshold = cfg.nms.get('score_thr', 0.0) nms_pre = cfg.get('deploy_nms_pre', -1) # Different from the normal forward doing NMS level by level, # we do NMS across all levels when exporting ONNX. dets, _ = add_dummy_nms_for_onnx(batch_bboxes, batch_scores, cfg.max_per_img, cfg.nms.iou_threshold, score_threshold, nms_pre, cfg.max_per_img) return dets
def _get_bboxes(self, mlvl_cls_scores, mlvl_bbox_preds, mlvl_anchors, img_shapes, scale_factors, cfg, rescale=False, with_nms=True): """Transform outputs for a batch item into bbox predictions. Args: mlvl_cls_scores (list[Tensor]): Each element in the list is the scores of bboxes of single level in the feature pyramid, has shape (N, num_anchors * num_classes, H, W). mlvl_bbox_preds (list[Tensor]): Each element in the list is the bboxes predictions of single level in the feature pyramid, has shape (N, num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Each element in the list is the anchors of single level in feature pyramid, has shape (num_anchors, 4). img_shapes (list[tuple[int]]): Each tuple in the list represent the shape(height, width, 3) of single image in the batch. scale_factors (list[ndarray]): Scale factor of the batch image arange as list[(w_scale, h_scale, w_scale, h_scale)]. cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where 5 represent (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1. The shape of the second tensor in the tuple is (n,), and each element represents the class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(mlvl_cls_scores) == len(mlvl_bbox_preds) == len( mlvl_anchors) batch_size = mlvl_cls_scores[0].shape[0] # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=mlvl_cls_scores[0].device, dtype=torch.long) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(mlvl_cls_scores, mlvl_bbox_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) anchors = anchors.expand_as(bbox_pred) # Always keep topk op for dynamic input in onnx from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1]) if nms_pre > 0: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(-1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[..., :-1].max(-1) _, topk_inds = max_scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) anchors = anchors[batch_inds, topk_inds, :] bbox_pred = bbox_pred[batch_inds, topk_inds, :] scores = scores[batch_inds, topk_inds, :] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shapes) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) if rescale: batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor( scale_factors).unsqueeze(1) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export() and with_nms: from mmdet.core.export import add_dummy_nms_for_onnx # ignore background class if not self.use_sigmoid_cls: num_classes = batch_mlvl_scores.shape[2] - 1 batch_mlvl_scores = batch_mlvl_scores[..., :num_classes] max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', 200) iou_threshold = cfg.nms.get('iou_threshold', 0.5) score_threshold = cfg.score_thr nms_pre = cfg.get('deploy_nms_pre', -1) return add_dummy_nms_for_onnx(batch_mlvl_bboxes, batch_mlvl_scores, max_output_boxes_per_class, iou_threshold, score_threshold, nms_pre, cfg.max_per_img) if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = batch_mlvl_scores.new_zeros(batch_size, batch_mlvl_scores.shape[1], 1) batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1) if with_nms: det_results = [] for (mlvl_bboxes, mlvl_scores) in zip(batch_mlvl_bboxes, batch_mlvl_scores): det_bbox, det_label = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) det_results.append(tuple([det_bbox, det_label])) else: det_results = [ tuple(mlvl_bs) for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores) ] return det_results
def onnx_export(self, x, img_metas): """Test without augmentation. Args: x (tuple[Tensor]): Features from the upstream network, each is a 4D-tensor. img_metas (list[dict]): Meta info of each image. Returns: tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] and class labels of shape [N, num_det]. """ cls_scores, bbox_preds = self(x) assert len(cls_scores) == len(bbox_preds) num_levels = len(cls_scores) device = cls_scores[0].device featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] mlvl_anchors = self.anchor_generator.grid_anchors(featmap_sizes, device=device) cls_scores = [cls_scores[i].detach() for i in range(num_levels)] bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)] assert len( img_metas ) == 1, 'Only support one input image while in exporting to ONNX' img_shapes = img_metas[0]['img_shape_for_onnx'] cfg = copy.deepcopy(self.test_cfg) mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] batch_size = cls_scores[0].shape[0] nms_pre_tensor = torch.tensor(cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long) for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(-1)[..., 0] rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, 4) anchors = mlvl_anchors[idx] anchors = anchors.expand_as(rpn_bbox_pred) # Get top-k prediction from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, rpn_bbox_pred.shape[1]) if nms_pre > 0: _, topk_inds = scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 # Mind k<=3480 in TensorRT for TopK transformed_inds = scores.shape[1] * batch_inds + topk_inds scores = scores.reshape(-1, 1)[transformed_inds].reshape( batch_size, -1) rpn_bbox_pred = rpn_bbox_pred.reshape( -1, 4)[transformed_inds, :].reshape(batch_size, -1, 4) anchors = anchors.reshape(-1, 4)[transformed_inds, :].reshape( batch_size, -1, 4) mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1) batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1) batch_mlvl_proposals = self.bbox_coder.decode(batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes) # Use ONNX::NonMaxSuppression in deployment from mmdet.core.export import add_dummy_nms_for_onnx batch_mlvl_scores = batch_mlvl_scores.unsqueeze(2) score_threshold = cfg.nms.get('score_thr', 0.0) nms_pre = cfg.get('deploy_nms_pre', -1) dets, _ = add_dummy_nms_for_onnx(batch_mlvl_proposals, batch_mlvl_scores, cfg.max_per_img, cfg.nms.iou_threshold, score_threshold, nms_pre, cfg.max_per_img) return dets
def onnx_export(self, cls_scores, bbox_preds, score_factors=None, img_metas=None, with_nms=True): """Transform network output for a batch into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level with shape (N, num_points * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_points * 4, H, W). score_factors (list[Tensor]): score_factors for each s cale level with shape (N, num_points * 1, H, W). Default: None. img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. Default: None. with_nms (bool): Whether apply nms to the bboxes. Default: True. Returns: tuple[Tensor, Tensor] | list[tuple]: When `with_nms` is True, it is tuple[Tensor, Tensor], first tensor bboxes with shape [N, num_det, 5], 5 arrange as (x1, y1, x2, y2, score) and second element is class labels of shape [N, num_det]. When `with_nms` is False, first tensor is bboxes with shape [N, num_det, 4], second tensor is raw score has shape [N, num_det, num_classes]. """ assert len(cls_scores) == len(bbox_preds) num_levels = len(cls_scores) featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] mlvl_priors = self.prior_generator.grid_priors( featmap_sizes, dtype=bbox_preds[0].dtype, device=bbox_preds[0].device) mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)] mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)] assert len( img_metas ) == 1, 'Only support one input image while in exporting to ONNX' img_shape = img_metas[0]['img_shape_for_onnx'] cfg = self.test_cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors) device = cls_scores[0].device batch_size = cls_scores[0].shape[0] # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=device, dtype=torch.long) # e.g. Retina, FreeAnchor, etc. if score_factors is None: with_score_factors = False mlvl_score_factor = [None for _ in range(num_levels)] else: # e.g. FCOS, PAA, ATSS, etc. with_score_factors = True mlvl_score_factor = [ score_factors[i].detach() for i in range(num_levels) ] mlvl_score_factors = [] mlvl_batch_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, score_factors, priors in zip( mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor, mlvl_priors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1, self.cls_out_channels) if self.use_sigmoid_cls: scores = scores.sigmoid() nms_pre_score = scores else: scores = scores.softmax(-1) nms_pre_score = scores if with_score_factors: score_factors = score_factors.permute(0, 2, 3, 1).reshape( batch_size, -1).sigmoid() bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) priors = priors.expand(batch_size, -1, priors.size(-1)) # Get top-k predictions from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1]) if nms_pre > 0: if with_score_factors: nms_pre_score = (nms_pre_score * score_factors[..., None]) else: nms_pre_score = nms_pre_score # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = nms_pre_score.max(-1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = nms_pre_score[..., :-1].max(-1) _, topk_inds = max_scores.topk(nms_pre) batch_inds = torch.arange(batch_size, device=bbox_pred.device).view( -1, 1).expand_as(topk_inds).long() # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 transformed_inds = bbox_pred.shape[1] * batch_inds + topk_inds priors = priors.reshape( -1, priors.size(-1))[transformed_inds, :].reshape( batch_size, -1, priors.size(-1)) bbox_pred = bbox_pred.reshape(-1, 4)[transformed_inds, :].reshape( batch_size, -1, 4) scores = scores.reshape( -1, self.cls_out_channels)[transformed_inds, :].reshape( batch_size, -1, self.cls_out_channels) if with_score_factors: score_factors = score_factors.reshape( -1, 1)[transformed_inds].reshape(batch_size, -1) bboxes = self.bbox_coder.decode(priors, bbox_pred, max_shape=img_shape) mlvl_batch_bboxes.append(bboxes) mlvl_scores.append(scores) if with_score_factors: mlvl_score_factors.append(score_factors) batch_bboxes = torch.cat(mlvl_batch_bboxes, dim=1) batch_scores = torch.cat(mlvl_scores, dim=1) if with_score_factors: batch_score_factors = torch.cat(mlvl_score_factors, dim=1) # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment from mmdet.core.export import add_dummy_nms_for_onnx if not self.use_sigmoid_cls: batch_scores = batch_scores[..., :self.num_classes] if with_score_factors: batch_scores = batch_scores * (batch_score_factors.unsqueeze(2)) if with_nms: max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', 200) iou_threshold = cfg.nms.get('iou_threshold', 0.5) score_threshold = cfg.score_thr nms_pre = cfg.get('deploy_nms_pre', -1) return add_dummy_nms_for_onnx(batch_bboxes, batch_scores, max_output_boxes_per_class, iou_threshold, score_threshold, nms_pre, cfg.max_per_img) else: return batch_bboxes, batch_scores
def _get_bboxes(self, cls_scores, bbox_preds, mlvl_anchors, img_shapes, scale_factors, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shapes (list[tuple[int]]): Shape of the input image, (height, width, 3). scale_factors (list[ndarray]): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. The second item is a (n,) tensor where each item is the predicted class labelof the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg cfg = copy.deepcopy(cfg) # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] batch_size = cls_scores[0].shape[0] nms_pre_tensor = torch.tensor(cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long) for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(-1)[..., 0] rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, 4) anchors = mlvl_anchors[idx] anchors = anchors.expand_as(rpn_bbox_pred) # Get top-k prediction from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, rpn_bbox_pred.shape[1]) if nms_pre > 0: _, topk_inds = scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 if torch.onnx.is_in_onnx_export(): # Mind k<=3480 in TensorRT for TopK transformed_inds = scores.shape[1] * batch_inds + topk_inds scores = scores.reshape(-1, 1)[transformed_inds].reshape( batch_size, -1) rpn_bbox_pred = rpn_bbox_pred.reshape( -1, 4)[transformed_inds, :].reshape(batch_size, -1, 4) anchors = anchors.reshape(-1, 4)[transformed_inds, :].reshape( batch_size, -1, 4) else: # sort is faster than topk ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:, :cfg.nms_pre] scores = ranked_scores[:, :cfg.nms_pre] batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] anchors = anchors[batch_inds, topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full(( batch_size, scores.size(1), ), idx, dtype=torch.long)) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1) batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1) batch_mlvl_proposals = self.bbox_coder.decode(batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes) batch_mlvl_ids = torch.cat(level_ids, dim=1) # deprecate arguments warning if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg: warnings.warn( 'In rpn_proposal or test_cfg, ' 'nms_thr has been moved to a dict named nms as ' 'iou_threshold, max_num has been renamed as max_per_img, ' 'name of original arguments and the way to specify ' 'iou_threshold of NMS will be deprecated.') if 'nms' not in cfg: cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr)) if 'max_num' in cfg: if 'max_per_img' in cfg: assert cfg.max_num == cfg.max_per_img, f'You ' \ f'set max_num and ' \ f'max_per_img at the same time, but get {cfg.max_num} ' \ f'and {cfg.max_per_img} respectively' \ 'Please delete max_num which will be deprecated.' else: cfg.max_per_img = cfg.max_num if 'nms_thr' in cfg: assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \ f' iou_threshold in nms and ' \ f'nms_thr at the same time, but get' \ f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \ f' respectively. Please delete the nms_thr ' \ f'which will be deprecated.' # Replace batched_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export(): from mmdet.core.export import add_dummy_nms_for_onnx batch_mlvl_scores = batch_mlvl_scores.unsqueeze(2) score_threshold = cfg.nms.get('score_thr', 0.0) nms_pre = cfg.get('deploy_nms_pre', cfg.max_per_img) dets, _ = add_dummy_nms_for_onnx(batch_mlvl_proposals, batch_mlvl_scores, cfg.max_per_img, cfg.nms.iou_threshold, score_threshold, nms_pre, cfg.max_per_img) return dets result_list = [] for (mlvl_proposals, mlvl_scores, mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores, batch_mlvl_ids): # Skip nonzero op while exporting to ONNX if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()): w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0] h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1] valid_ind = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_ind.sum().item() != len(mlvl_proposals): mlvl_proposals = mlvl_proposals[valid_ind, :] mlvl_scores = mlvl_scores[valid_ind] mlvl_ids = mlvl_ids[valid_ind] dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids, cfg.nms) result_list.append(dets[:cfg.max_per_img]) return result_list
def onnx_export(self, rois, cls_score, bbox_pred, img_shape, cfg=None, **kwargs): """Transform network output for a batch into bbox predictions. Args: rois (Tensor): Boxes to be transformed. Has shape (B, num_boxes, 5) cls_score (Tensor): Box scores. has shape (B, num_boxes, num_classes + 1), 1 represent the background. bbox_pred (Tensor, optional): Box energies / deltas for, has shape (B, num_boxes, num_classes * 4) when. img_shape (torch.Tensor): Shape of image. cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None Returns: tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] and class labels of shape [N, num_det]. """ assert rois.ndim == 3, 'Only support export two stage ' \ 'model to ONNX ' \ 'with batch dimension. ' if self.custom_cls_channels: scores = self.loss_cls.get_activation(cls_score) else: scores = F.softmax(cls_score, dim=-1) if cls_score is not None else None if bbox_pred is not None: bboxes = self.bbox_coder.decode(rois[..., 1:], bbox_pred, max_shape=img_shape) else: bboxes = rois[..., 1:].clone() if img_shape is not None: max_shape = bboxes.new_tensor(img_shape)[..., :2] min_xy = bboxes.new_tensor(0) max_xy = torch.cat([max_shape] * 2, dim=-1).flip(-1).unsqueeze(-2) bboxes = torch.where(bboxes < min_xy, min_xy, bboxes) bboxes = torch.where(bboxes > max_xy, max_xy, bboxes) # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment from mmdet.core.export import add_dummy_nms_for_onnx max_output_boxes_per_class = cfg.nms.get('max_output_boxes_per_class', cfg.max_per_img) iou_threshold = cfg.nms.get('iou_threshold', 0.5) score_threshold = cfg.score_thr nms_pre = cfg.get('deploy_nms_pre', -1) scores = scores[..., :self.num_classes] if self.reg_class_agnostic: return add_dummy_nms_for_onnx(bboxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, pre_top_k=nms_pre, after_top_k=cfg.max_per_img) else: batch_size = scores.shape[0] labels = torch.arange(self.num_classes, dtype=torch.long).to(scores.device) labels = labels.view(1, 1, -1).expand_as(scores) labels = labels.reshape(batch_size, -1) scores = scores.reshape(batch_size, -1) bboxes = bboxes.reshape(batch_size, -1, 4) max_size = torch.max(img_shape) # Offset bboxes of each class so that bboxes of different labels # do not overlap. offsets = (labels * max_size + 1).unsqueeze(2) bboxes_for_nms = bboxes + offsets batch_dets, labels = add_dummy_nms_for_onnx( bboxes_for_nms, scores.unsqueeze(2), max_output_boxes_per_class, iou_threshold, score_threshold, pre_top_k=nms_pre, after_top_k=cfg.max_per_img, labels=labels) # Offset the bboxes back after dummy nms. offsets = (labels * max_size + 1).unsqueeze(2) # Indexing + inplace operation fails with dynamic shape in ONNX # original style: batch_dets[..., :4] -= offsets bboxes, scores = batch_dets[..., 0:4], batch_dets[..., 4:5] bboxes -= offsets batch_dets = torch.cat([bboxes, scores], dim=2) return batch_dets, labels
def _get_bboxes(self, pred_maps_list, scale_factors, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: pred_maps_list (list[Tensor]): Prediction maps for different scales of each single image in the batch. scale_factors (list(ndarray)): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where 5 represent (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1. The shape of the second tensor in the tuple is (n,), and each element represents the class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(pred_maps_list) == self.num_levels device = pred_maps_list[0].device batch_size = pred_maps_list[0].shape[0] featmap_sizes = [ pred_maps_list[i].shape[-2:] for i in range(self.num_levels) ] multi_lvl_anchors = self.anchor_generator.grid_anchors( featmap_sizes, device) # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=device, dtype=torch.long) multi_lvl_bboxes = [] multi_lvl_cls_scores = [] multi_lvl_conf_scores = [] for i in range(self.num_levels): # get some key info for current scale pred_map = pred_maps_list[i] stride = self.featmap_strides[i] # (b,h, w, num_anchors*num_attrib) -> # (b,h*w*num_anchors, num_attrib) pred_map = pred_map.permute(0, 2, 3, 1).reshape(batch_size, -1, self.num_attrib) # Inplace operation like # ```pred_map[..., :2] = \torch.sigmoid(pred_map[..., :2])``` # would create constant tensor when exporting to onnx pred_map_conf = torch.sigmoid(pred_map[..., :2]) pred_map_rest = pred_map[..., 2:] pred_map = torch.cat([pred_map_conf, pred_map_rest], dim=-1) pred_map_boxes = pred_map[..., :4] multi_lvl_anchor = multi_lvl_anchors[i] multi_lvl_anchor = multi_lvl_anchor.expand_as(pred_map_boxes) bbox_pred = self.bbox_coder.decode(multi_lvl_anchor, pred_map_boxes, stride) # conf and cls conf_pred = torch.sigmoid(pred_map[..., 4]) cls_pred = torch.sigmoid(pred_map[..., 5:]).view( batch_size, -1, self.num_classes) # Cls pred one-hot. # Get top-k prediction from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1]) if nms_pre > 0: _, topk_inds = conf_pred.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds).long() # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 if torch.onnx.is_in_onnx_export(): transformed_inds = (bbox_pred.shape[1] * batch_inds + topk_inds) bbox_pred = bbox_pred.reshape( -1, 4)[transformed_inds, :].reshape(batch_size, -1, 4) cls_pred = cls_pred.reshape( -1, self.num_classes)[transformed_inds, :].reshape( batch_size, -1, self.num_classes) conf_pred = conf_pred.reshape(-1, 1)[transformed_inds].reshape( batch_size, -1) else: bbox_pred = bbox_pred[batch_inds, topk_inds, :] cls_pred = cls_pred[batch_inds, topk_inds, :] conf_pred = conf_pred[batch_inds, topk_inds] # Save the result of current scale multi_lvl_bboxes.append(bbox_pred) multi_lvl_cls_scores.append(cls_pred) multi_lvl_conf_scores.append(conf_pred) # Merge the results of different scales together batch_mlvl_bboxes = torch.cat(multi_lvl_bboxes, dim=1) batch_mlvl_scores = torch.cat(multi_lvl_cls_scores, dim=1) batch_mlvl_conf_scores = torch.cat(multi_lvl_conf_scores, dim=1) # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export() and with_nms: from mmdet.core.export import add_dummy_nms_for_onnx conf_thr = cfg.get('conf_thr', -1) score_thr = cfg.get('score_thr', -1) # follow original pipeline of YOLOv3 if conf_thr > 0: mask = (batch_mlvl_conf_scores >= conf_thr).float() batch_mlvl_conf_scores *= mask if score_thr > 0: mask = (batch_mlvl_scores > score_thr).float() batch_mlvl_scores *= mask batch_mlvl_conf_scores = batch_mlvl_conf_scores.unsqueeze( 2).expand_as(batch_mlvl_scores) batch_mlvl_scores = batch_mlvl_scores * batch_mlvl_conf_scores max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', 200) iou_threshold = cfg.nms.get('iou_threshold', 0.5) # keep aligned with original pipeline, improve # mAP by 1% for YOLOv3 in ONNX score_threshold = 0 nms_pre = cfg.get('deploy_nms_pre', -1) return add_dummy_nms_for_onnx( batch_mlvl_bboxes, batch_mlvl_scores, max_output_boxes_per_class, iou_threshold, score_threshold, nms_pre, cfg.max_per_img, ) if with_nms and (batch_mlvl_conf_scores.size(0) == 0): return torch.zeros((0, 5)), torch.zeros((0, )) if rescale: batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor( scale_factors).unsqueeze(1) # In mmdet 2.x, the class_id for background is num_classes. # i.e., the last column. padding = batch_mlvl_scores.new_zeros(batch_size, batch_mlvl_scores.shape[1], 1) batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1) # Support exporting to onnx without nms if with_nms and cfg.get('nms', None) is not None: det_results = [] for (mlvl_bboxes, mlvl_scores, mlvl_conf_scores) in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_conf_scores): # Filtering out all predictions with conf < conf_thr conf_thr = cfg.get('conf_thr', -1) if conf_thr > 0 and (not torch.onnx.is_in_onnx_export()): # TensorRT not support NonZero # add as_tuple=False for compatibility in Pytorch 1.6 # flatten would create a Reshape op with constant values, # and raise RuntimeError when doing inference in ONNX # Runtime with a different input image (#4221). conf_inds = mlvl_conf_scores.ge(conf_thr).nonzero( as_tuple=False).squeeze(1) mlvl_bboxes = mlvl_bboxes[conf_inds, :] mlvl_scores = mlvl_scores[conf_inds, :] mlvl_conf_scores = mlvl_conf_scores[conf_inds] det_bboxes, det_labels = multiclass_nms( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_conf_scores) det_results.append(tuple([det_bboxes, det_labels])) else: det_results = [ tuple(mlvl_bs) for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_conf_scores) ] return det_results
def get_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): """Transform network output for a batch into bbox predictions. If the input rois has batch dimension, the function would be in `batch_mode` and return is a tuple[list[Tensor], list[Tensor]], otherwise, the return is a tuple[Tensor, Tensor]. Args: rois (Tensor): Boxes to be transformed. Has shape (num_boxes, 5) or (B, num_boxes, 5) cls_score (list[Tensor] or Tensor): Box scores for each scale level, each is a 4D-tensor, the channel number is num_points * num_classes. bbox_pred (Tensor, optional): Box energies / deltas for each scale level, each is a 4D-tensor, the channel number is num_classes * 4. img_shape (Sequence[int] or torch.Tensor or Sequence[ Sequence[int]], optional): Maximum bounds for boxes, specifies (H, W, C) or (H, W). If rois shape is (B, num_boxes, 4), then the max_shape should be a Sequence[Sequence[int]] and the length of max_shape should also be B. scale_factor (tuple[ndarray] or ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). In `batch_mode`, the scale_factor shape is tuple[ndarray]. rescale (bool): If True, return boxes in original image space. Default: False. cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None Returns: tuple[list[Tensor], list[Tensor]] or tuple[Tensor, Tensor]: If the input has a batch dimension, the return value is a tuple of the list. The first list contains the boxes of the corresponding image in a batch, each tensor has the shape (num_boxes, 5) and last dimension 5 represent (tl_x, tl_y, br_x, br_y, score). Each Tensor in the second list is the labels with shape (num_boxes, ). The length of both lists should be equal to batch_size. Otherwise return value is a tuple of two tensors, the first tensor is the boxes with scores, the second tensor is the labels, both have the same shape as the first case. """ if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=-1) if cls_score is not None else None batch_mode = True if rois.ndim == 2: # e.g. AugTest, Cascade R-CNN, HTC, SCNet... batch_mode = False # add batch dimension if scores is not None: scores = scores.unsqueeze(0) if bbox_pred is not None: bbox_pred = bbox_pred.unsqueeze(0) rois = rois.unsqueeze(0) if bbox_pred is not None: bboxes = self.bbox_coder.decode(rois[..., 1:], bbox_pred, max_shape=img_shape) else: bboxes = rois[..., 1:].clone() if img_shape is not None: max_shape = bboxes.new_tensor(img_shape)[..., :2] min_xy = bboxes.new_tensor(0) max_xy = torch.cat([max_shape] * 2, dim=-1).flip(-1).unsqueeze(-2) bboxes = torch.where(bboxes < min_xy, min_xy, bboxes) bboxes = torch.where(bboxes > max_xy, max_xy, bboxes) if rescale and bboxes.size(-2) > 0: if not isinstance(scale_factor, tuple): scale_factor = tuple([scale_factor]) # B, 1, bboxes.size(-1) scale_factor = bboxes.new_tensor(scale_factor).unsqueeze(1).repeat( 1, 1, bboxes.size(-1) // 4) bboxes /= scale_factor # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export(): from mmdet.core.export import add_dummy_nms_for_onnx batch_size = scores.shape[0] # ignore background class scores = scores[..., :self.num_classes] labels = torch.arange(self.num_classes, dtype=torch.long).to(scores.device) labels = labels.view(1, 1, -1).expand_as(scores) labels = labels.reshape(batch_size, -1) scores = scores.reshape(batch_size, -1) bboxes = bboxes.reshape(batch_size, -1, 4) max_size = torch.max(img_shape) # Offset bboxes of each class so that bboxes of different labels # do not overlap. offsets = (labels * max_size + 1).unsqueeze(2) bboxes_for_nms = bboxes + offsets max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', cfg.max_per_img) iou_threshold = cfg.nms.get('iou_threshold', 0.5) score_threshold = cfg.score_thr nms_pre = cfg.get('deploy_nms_pre', -1) batch_dets, labels = add_dummy_nms_for_onnx( bboxes_for_nms, scores.unsqueeze(2), max_output_boxes_per_class, iou_threshold, score_threshold, pre_top_k=nms_pre, after_top_k=cfg.max_per_img, labels=labels) # Offset the bboxes back after dummy nms. offsets = (labels * max_size + 1).unsqueeze(2) # Indexing + inplace operation fails with dynamic shape in ONNX # original style: batch_dets[..., :4] -= offsets bboxes, scores = batch_dets[..., 0:4], batch_dets[..., 4:5] bboxes -= offsets batch_dets = torch.cat([bboxes, scores], dim=2) return batch_dets, labels det_bboxes = [] det_labels = [] for (bbox, score) in zip(bboxes, scores): if cfg is not None: det_bbox, det_label = multiclass_nms(bbox, score, cfg.score_thr, cfg.nms, cfg.max_per_img) else: det_bbox, det_label = bbox, score det_bboxes.append(det_bbox) det_labels.append(det_label) if not batch_mode: det_bboxes = det_bboxes[0] det_labels = det_labels[0] return det_bboxes, det_labels
def get_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) # scores = self._merge_score(cls_score) scores = cls_score.index_select(-1, torch.tensor([2,3,10,5,6,7,13,14,8,11,1,1], device=cls_score.device)) scores = F.softmax(scores, dim=-1) batch_mode = True if rois.ndim == 2: # e.g. AugTest, Cascade R-CNN, HTC, SCNet... batch_mode = False # add batch dimension if scores is not None: scores = scores.unsqueeze(0) if bbox_pred is not None: bbox_pred = bbox_pred.unsqueeze(0) rois = rois.unsqueeze(0) if bbox_pred is not None: bboxes = self.bbox_coder.decode( rois[..., 1:], bbox_pred, max_shape=img_shape) else: bboxes = rois[..., 1:].clone() if img_shape is not None: max_shape = bboxes.new_tensor(img_shape)[..., :2] min_xy = bboxes.new_tensor(0) max_xy = torch.cat( [max_shape] * 2, dim=-1).flip(-1).unsqueeze(-2) bboxes = torch.where(bboxes < min_xy, min_xy, bboxes) bboxes = torch.where(bboxes > max_xy, max_xy, bboxes) if rescale and bboxes.size(-2) > 0: if not isinstance(scale_factor, tuple): scale_factor = tuple([scale_factor]) # B, 1, bboxes.size(-1) scale_factor = bboxes.new_tensor(scale_factor).unsqueeze(1).repeat( 1, 1, bboxes.size(-1) // 4) bboxes /= scale_factor # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export(): from mmdet.core.export import add_dummy_nms_for_onnx batch_size = scores.shape[0] # ignore background class scores = scores[..., :self.num_classes] labels = torch.arange( self.num_classes, dtype=torch.long).to(scores.device) labels = labels.view(1, 1, -1).expand_as(scores) labels = labels.reshape(batch_size, -1) scores = scores.reshape(batch_size, -1) bboxes = bboxes.reshape(batch_size, -1, 4) max_size = torch.max(img_shape) # Offset bboxes of each class so that bboxes of different labels # do not overlap. offsets = (labels * max_size + 1).unsqueeze(2) bboxes_for_nms = bboxes + offsets max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', cfg.max_per_img) iou_threshold = cfg.nms.get('iou_threshold', 0.5) score_threshold = cfg.score_thr nms_pre = cfg.get('deploy_nms_pre', -1) batch_dets, labels = add_dummy_nms_for_onnx( bboxes_for_nms, scores.unsqueeze(2), max_output_boxes_per_class, iou_threshold, score_threshold, pre_top_k=nms_pre, after_top_k=cfg.max_per_img, labels=labels) # Offset the bboxes back after dummy nms. offsets = (labels * max_size + 1).unsqueeze(2) # Indexing + inplace operation fails with dynamic shape in ONNX # original style: batch_dets[..., :4] -= offsets bboxes, scores = batch_dets[..., 0:4], batch_dets[..., 4:5] bboxes -= offsets batch_dets = torch.cat([bboxes, scores], dim=2) return batch_dets, labels det_bboxes = [] det_labels = [] for (bbox, score) in zip(bboxes, scores): if cfg is not None: det_bbox, det_label = multiclass_nms(bbox, score, cfg.score_thr, cfg.nms, cfg.max_per_img) else: det_bbox, det_label = bbox, score det_bboxes.append(det_bbox) det_labels.append(det_label) if not batch_mode: det_bboxes = det_bboxes[0] det_labels = det_labels[0] return det_bboxes, det_labels
def onnx_export(self, pred_maps, img_metas, with_nms=True): num_levels = len(pred_maps) pred_maps_list = [pred_maps[i].detach() for i in range(num_levels)] cfg = self.test_cfg assert len(pred_maps_list) == self.num_levels device = pred_maps_list[0].device batch_size = pred_maps_list[0].shape[0] featmap_sizes = [ pred_maps_list[i].shape[-2:] for i in range(self.num_levels) ] mlvl_anchors = self.prior_generator.grid_priors( featmap_sizes, device=device) # convert to tensor to keep tracing nms_pre_tensor = torch.tensor( cfg.get('nms_pre', -1), device=device, dtype=torch.long) multi_lvl_bboxes = [] multi_lvl_cls_scores = [] multi_lvl_conf_scores = [] for i in range(self.num_levels): # get some key info for current scale pred_map = pred_maps_list[i] stride = self.featmap_strides[i] # (b,h, w, num_anchors*num_attrib) -> # (b,h*w*num_anchors, num_attrib) pred_map = pred_map.permute(0, 2, 3, 1).reshape(batch_size, -1, self.num_attrib) # Inplace operation like # ```pred_map[..., :2] = \torch.sigmoid(pred_map[..., :2])``` # would create constant tensor when exporting to onnx pred_map_conf = torch.sigmoid(pred_map[..., :2]) pred_map_rest = pred_map[..., 2:] pred_map = torch.cat([pred_map_conf, pred_map_rest], dim=-1) pred_map_boxes = pred_map[..., :4] multi_lvl_anchor = mlvl_anchors[i] multi_lvl_anchor = multi_lvl_anchor.expand_as(pred_map_boxes) bbox_pred = self.bbox_coder.decode(multi_lvl_anchor, pred_map_boxes, stride) # conf and cls conf_pred = torch.sigmoid(pred_map[..., 4]) cls_pred = torch.sigmoid(pred_map[..., 5:]).view( batch_size, -1, self.num_classes) # Cls pred one-hot. # Get top-k prediction from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1]) if nms_pre > 0: _, topk_inds = conf_pred.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds).long() # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 transformed_inds = ( bbox_pred.shape[1] * batch_inds + topk_inds) bbox_pred = bbox_pred.reshape(-1, 4)[transformed_inds, :].reshape( batch_size, -1, 4) cls_pred = cls_pred.reshape( -1, self.num_classes)[transformed_inds, :].reshape( batch_size, -1, self.num_classes) conf_pred = conf_pred.reshape(-1, 1)[transformed_inds].reshape( batch_size, -1) # Save the result of current scale multi_lvl_bboxes.append(bbox_pred) multi_lvl_cls_scores.append(cls_pred) multi_lvl_conf_scores.append(conf_pred) # Merge the results of different scales together batch_mlvl_bboxes = torch.cat(multi_lvl_bboxes, dim=1) batch_mlvl_scores = torch.cat(multi_lvl_cls_scores, dim=1) batch_mlvl_conf_scores = torch.cat(multi_lvl_conf_scores, dim=1) # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment from mmdet.core.export import add_dummy_nms_for_onnx conf_thr = cfg.get('conf_thr', -1) score_thr = cfg.get('score_thr', -1) # follow original pipeline of YOLOv3 if conf_thr > 0: mask = (batch_mlvl_conf_scores >= conf_thr).float() batch_mlvl_conf_scores *= mask if score_thr > 0: mask = (batch_mlvl_scores > score_thr).float() batch_mlvl_scores *= mask batch_mlvl_conf_scores = batch_mlvl_conf_scores.unsqueeze(2).expand_as( batch_mlvl_scores) batch_mlvl_scores = batch_mlvl_scores * batch_mlvl_conf_scores if with_nms: max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', 200) iou_threshold = cfg.nms.get('iou_threshold', 0.5) # keep aligned with original pipeline, improve # mAP by 1% for YOLOv3 in ONNX score_threshold = 0 nms_pre = cfg.get('deploy_nms_pre', -1) return add_dummy_nms_for_onnx( batch_mlvl_bboxes, batch_mlvl_scores, max_output_boxes_per_class, iou_threshold, score_threshold, nms_pre, cfg.max_per_img, ) else: return batch_mlvl_bboxes, batch_mlvl_scores
def _get_bboxes(self, cls_scores, bbox_preds, centernesses, mlvl_points, img_shapes, scale_factors, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for a single scale level with shape (N, num_points * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for a single scale level with shape (N, num_points * 4, H, W). centernesses (list[Tensor]): Centerness for a single scale level with shape (N, num_points, H, W). mlvl_points (list[Tensor]): Box reference for a single scale level with shape (num_total_points, 4). img_shapes (list[tuple[int]]): Shape of the input image, list[(height, width, 3)]. scale_factors (list[ndarray]): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: tuple(Tensor): det_bboxes (Tensor): BBox predictions in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (n,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) device = cls_scores[0].device batch_size = cls_scores[0].shape[0] # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=device, dtype=torch.long) mlvl_bboxes = [] mlvl_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, centerness, points in zip( cls_scores, bbox_preds, centernesses, mlvl_points): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(0, 2, 3, 1).reshape( batch_size, -1, self.cls_out_channels).sigmoid() centerness = centerness.permute(0, 2, 3, 1).reshape(batch_size, -1).sigmoid() bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) points = points.expand(batch_size, -1, 2) # Get top-k prediction from mmdet.core.export import get_k_for_topk nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1]) if nms_pre > 0: max_scores, _ = (scores * centerness[..., None]).max(-1) _, topk_inds = max_scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds).long() # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 if torch.onnx.is_in_onnx_export(): transformed_inds = bbox_pred.shape[ 1] * batch_inds + topk_inds points = points.reshape(-1, 2)[transformed_inds, :].reshape( batch_size, -1, 2) bbox_pred = bbox_pred.reshape( -1, 4)[transformed_inds, :].reshape(batch_size, -1, 4) scores = scores.reshape( -1, self.num_classes)[transformed_inds, :].reshape( batch_size, -1, self.num_classes) centerness = centerness.reshape( -1, 1)[transformed_inds].reshape(batch_size, -1) else: points = points[batch_inds, topk_inds, :] bbox_pred = bbox_pred[batch_inds, topk_inds, :] scores = scores[batch_inds, topk_inds, :] centerness = centerness[batch_inds, topk_inds] bboxes = distance2bbox(points, bbox_pred, max_shape=img_shapes) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_centerness.append(centerness) batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) if rescale: batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor( scale_factors).unsqueeze(1) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_centerness = torch.cat(mlvl_centerness, dim=1) # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment if torch.onnx.is_in_onnx_export() and with_nms: from mmdet.core.export import add_dummy_nms_for_onnx batch_mlvl_scores = batch_mlvl_scores * ( batch_mlvl_centerness.unsqueeze(2)) max_output_boxes_per_class = cfg.nms.get( 'max_output_boxes_per_class', 200) iou_threshold = cfg.nms.get('iou_threshold', 0.5) score_threshold = cfg.score_thr nms_pre = cfg.get('deploy_nms_pre', -1) return add_dummy_nms_for_onnx(batch_mlvl_bboxes, batch_mlvl_scores, max_output_boxes_per_class, iou_threshold, score_threshold, nms_pre, cfg.max_per_img) # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = batch_mlvl_scores.new_zeros(batch_size, batch_mlvl_scores.shape[1], 1) batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1) if with_nms: det_results = [] for (mlvl_bboxes, mlvl_scores, mlvl_centerness) in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_centerness): det_bbox, det_label = multiclass_nms( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_centerness) det_results.append(tuple([det_bbox, det_label])) else: det_results = [ tuple(mlvl_bs) for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_centerness) ] return det_results