def _topk_min(input: Tensor, orig_kval: int, axis: int) -> int: """ ONNX spec requires the k-value to be less than or equal to the number of inputs along provided dim. Certain models use the number of elements along a particular axis instead of K if K exceeds the number of elements along that axis. Previously, python's min() function was used to determine whether to use the provided k-value or the specified dim axis value. However in cases where the model is being exported in tracing mode, python min() is static causing the model to be traced incorrectly and eventually fail at the topk node. In order to avoid this situation, in tracing mode, torch.min() is used instead. Args: input (Tensor): The orignal input tensor. orig_kval (int): The provided k-value. axis(int): Axis along which we retreive the input size. Returns: min_kval (int): Appropriately selected k-value. """ if not torch.jit.is_tracing(): return min(orig_kval, input.size(axis)) axis_dim_val = torch._shape_as_tensor(input)[axis].unsqueeze(0) min_kval = torch.min( torch.cat((torch.tensor([orig_kval], dtype=axis_dim_val.dtype), axis_dim_val), 0)) return _fake_cast_onnx(min_kval)
def simple_test(self, img, img_metas, rescale=False): """Test function without test time augmentation. Args: imgs (list[torch.Tensor]): List of multiple images img_metas (list[dict]): List of image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: list[list[np.ndarray]]: BBox results of each image and classes. The outer list corresponds to each image. The inner list corresponds to each class. """ x = self.extract_feat(img) outs = self.bbox_head(x) # get origin input shape to support onnx dynamic shape if torch.onnx.is_in_onnx_export(): # get shape as tensor img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape bbox_list = self.bbox_head.get_bboxes( *outs, img_metas, rescale=rescale) # skip post-processing when exporting to ONNX if torch.onnx.is_in_onnx_export(): return bbox_list bbox_results = [ bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) for det_bboxes, det_labels in bbox_list ] return bbox_results
def onnx_export(self, img, img_metas, with_nms=True, rescale=False, **kwargs): """Test function without test time augmentation. Args: img (torch.Tensor): input images. img_metas (list[dict]): List of image information. Returns: tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] and class labels of shape [N, num_det]. """ x = self.extract_feat(img) outs = self.bbox_head(x) # get origin input shape to support onnx dynamic shape # get shape as tensor img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape # get pad input shape to support onnx dynamic shape for exporting # `CornerNet` and `CentripetalNet`, which 'pad_shape' is used # for inference img_metas[0]['pad_shape_for_onnx'] = img_shape if len(outs) == 2: # add dummy score_factor outs = (*outs, None) # TODO Can we change to `get_bboxes` when `onnx_export` fail det_bboxes, det_labels = self.bbox_head.onnx_export( *outs, img_metas, with_nms=with_nms) return det_bboxes, det_labels
def onnx_export(self, img, img_metas): """Test function without test time augmentation. Args: img (torch.Tensor): input images. img_metas (list[dict]): List of image information. Returns: tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] and class labels of shape [N, num_det]. """ x = self.extract_feat(img) outs = self.bbox_head(x) # get origin input shape to support onnx dynamic shape # get shape as tensor img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape # get pad input shape to support onnx dynamic shape for exporting # `CornerNet` and `CentripetalNet`, which 'pad_shape' is used # for inference img_metas[0]['pad_shape_for_onnx'] = img_shape # TODO:move all onnx related code in bbox_head to onnx_export function det_bboxes, det_labels = self.bbox_head.get_bboxes(*outs, img_metas) return det_bboxes, det_labels
def onnx_export(self, img, img_metas): img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape x = self.extract_feat(img) proposals = self.rpn_head.onnx_export(x, img_metas) return self.roi_head.onnx_export(x, proposals, img_metas)
def forward( self, query, key, layer_state: Optional[List[Tensor]], encoder_decoder_attention: bool, use_past=torch.tensor(False), ): bsz = torch._shape_as_tensor(query)[1] if layer_state is None or not use_past: if not encoder_decoder_attention: k = self.k_proj(query) v = self.v_proj(query) k = self.shape_proj(k, bsz) v = self.shape_proj(v, bsz) else: k = self.k_proj(key) v = self.v_proj(key) k = self.shape_proj(k, bsz) v = self.shape_proj(v, bsz) else: self_p_k, self_p_v, enc_dec_p_k, enc_dec_p_v = layer_state if not encoder_decoder_attention: k = self.k_proj(query) v = self.v_proj(query) k = self.shape_proj(k, bsz) v = self.shape_proj(v, bsz) k = torch.cat([self.shape_state(self_p_k, bsz), k], dim=1) v = torch.cat([self.shape_state(self_p_v, bsz), v], dim=1) else: k = self.shape_state(enc_dec_p_k, bsz) v = self.shape_state(enc_dec_p_v, bsz) return k, v
def simple_test(self, img, img_metas, rescale=False): """Test function without test time augmentation. Args: imgs (list[torch.Tensor]): List of multiple images img_metas (list[dict]): List of image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: list[np.ndarray]: proposals """ x = self.extract_feat(img) # get origin input shape to onnx dynamic input shape if torch.onnx.is_in_onnx_export(): img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) if rescale: for proposals, meta in zip(proposal_list, img_metas): proposals[:, :4] /= proposals.new_tensor(meta['scale_factor']) if torch.onnx.is_in_onnx_export(): return proposal_list return [proposal.cpu().numpy() for proposal in proposal_list]
def onnx_export(self, img, img_metas): img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape x = self.extract_feat(img) proposals = self.rpn_head.onnx_export(x, img_metas) if hasattr(self.roi_head, 'onnx_export'): return self.roi_head.onnx_export(x, proposals, img_metas) else: raise NotImplementedError( f'{self.__class__.__name__} can not ' f'be exported to ONNX. Please refer to the ' f'list of supported models,' f'https://mmdetection.readthedocs.io/en/latest/tutorials/pytorch2onnx.html#list-of-supported-models-exportable-to-onnx' # noqa E501 )
def simple_test(self, img, img_metas, proposals=None, rescale=False): """Test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' x = self.extract_feat(img) # get origin input shape to onnx dynamic input shape if torch.onnx.is_in_onnx_export(): img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape if proposals is None: proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) else: proposal_list = proposals return self.roi_head.simple_test( x, proposal_list, img_metas, rescale=rescale)
def onnx_export(self, img, img_metas): """Test function for exporting to ONNX, without test time augmentation. Args: img (torch.Tensor): input images. img_metas (list[dict]): List of image information. Returns: tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] and class labels of shape [N, num_det]. """ x = self.extract_feat(img) outs = self.bbox_head.forward(x) # get shape as tensor img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape det_bboxes, det_labels = self.bbox_head.onnx_export(*outs, img_metas) return det_bboxes, det_labels
def simple_test(self, img, img_metas, proposals=None, rescale=False): """Test without augmentation.""" assert self.with_bbox, 'Bbox head must be implemented.' x, mask_x, _ = self.extract_feat(img, None) # get origin input shape to onnx dynamic input shape if torch.onnx.is_in_onnx_export(): img_shape = torch._shape_as_tensor(img)[2:] img_metas[0]['img_shape_for_onnx'] = img_shape if proposals is None: proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) else: proposal_list = proposals if mask_x is not None: new_x = [] for x_, mask_x_ in zip(x, mask_x): new_x.append(torch.cat([x_, mask_x_], dim=1)) x = tuple(new_x) return self.roi_head.simple_test(x, proposal_list, img_metas, rescale=rescale)
def _get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shapes, scale_factors, cfg, rescale=False, with_nms=True): """Transform outputs for a batch item into bbox predictions. Args: cls_score_list (list[Tensor]): Box scores for a single scale level Has shape (N, num_anchors * num_classes, H, W). bbox_pred_list (list[Tensor]): Box energies / deltas for a single scale level with shape (N, num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 4). img_shapes (list[tuple[int]]): Shape of the batch input image, list[(height, width, 3)]. scale_factors (list[ndarray]): Scale factor of the batch image arange as list[(w_scale, h_scale, w_scale, h_scale)]. cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where 5 represent (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1. The shape of the second tensor in the tuple is (n,), and each element represents the class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) batch_size = cls_score_list[0].shape[0] # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=cls_score_list[0].device, dtype=torch.long) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) anchors = anchors.expand_as(bbox_pred) # Always keep topk op for dynamic input in onnx if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export() or scores.shape[-2] > nms_pre_tensor): from torch import _shape_as_tensor # keep shape as tensor and get k num_anchor = _shape_as_tensor(scores)[-2].to( nms_pre_tensor.device) nms_pre = torch.where(nms_pre_tensor < num_anchor, nms_pre_tensor, num_anchor) # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(-1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[..., :-1].max(-1) _, topk_inds = max_scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) anchors = anchors[batch_inds, topk_inds, :] bbox_pred = bbox_pred[batch_inds, topk_inds, :] scores = scores[batch_inds, topk_inds, :] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shapes) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) if rescale: batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor( scale_factors).unsqueeze(1) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) # Set max number of box to be feed into nms in deployment deploy_nms_pre = cfg.get('deploy_nms_pre', -1) if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export(): # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = batch_mlvl_scores.max(-1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = batch_mlvl_scores[..., :-1].max(-1) _, topk_inds = max_scores.topk(deploy_nms_pre) batch_inds = torch.arange(batch_size).view(-1, 1).expand_as(topk_inds) batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds] batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds] if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = batch_mlvl_scores.new_zeros(batch_size, batch_mlvl_scores.shape[1], 1) batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1) if with_nms: det_results = [] for (mlvl_bboxes, mlvl_scores) in zip(batch_mlvl_bboxes, batch_mlvl_scores): det_bbox, det_label = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) det_results.append(tuple([det_bbox, det_label])) else: det_results = [ tuple(mlvl_bs) for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores) ] return det_results
def _get_bboxes(self, cls_scores, bbox_preds, centernesses, mlvl_points, img_shapes, scale_factors, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for a single scale level with shape (N, num_points * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for a single scale level with shape (N, num_points * 4, H, W). centernesses (list[Tensor]): Centerness for a single scale level with shape (N, num_points, H, W). mlvl_points (list[Tensor]): Box reference for a single scale level with shape (num_total_points, 4). img_shapes (list[tuple[int]]): Shape of the input image, list[(height, width, 3)]. scale_factors (list[ndarray]): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: tuple(Tensor): det_bboxes (Tensor): BBox predictions in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (n,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) device = cls_scores[0].device batch_size = cls_scores[0].shape[0] # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=device, dtype=torch.long) mlvl_bboxes = [] mlvl_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, centerness, points in zip( cls_scores, bbox_preds, centernesses, mlvl_points): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(0, 2, 3, 1).reshape( batch_size, -1, self.cls_out_channels).sigmoid() centerness = centerness.permute(0, 2, 3, 1).reshape(batch_size, -1).sigmoid() bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) # Always keep topk op for dynamic input in onnx if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export() or scores.shape[-2] > nms_pre_tensor): from torch import _shape_as_tensor # keep shape as tensor and get k num_anchor = _shape_as_tensor(scores)[-2].to(device) nms_pre = torch.where(nms_pre_tensor < num_anchor, nms_pre_tensor, num_anchor) max_scores, _ = (scores * centerness[..., None]).max(-1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds).long() bbox_pred = bbox_pred[batch_inds, topk_inds, :] scores = scores[batch_inds, topk_inds, :] centerness = centerness[batch_inds, topk_inds] bboxes = distance2bbox(points, bbox_pred, max_shape=img_shapes) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_centerness.append(centerness) batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) if rescale: batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor( scale_factors).unsqueeze(1) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_centerness = torch.cat(mlvl_centerness, dim=1) # Set max number of box to be feed into nms in deployment deploy_nms_pre = cfg.get('deploy_nms_pre', -1) if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export(): batch_mlvl_scores, _ = ( batch_mlvl_scores * batch_mlvl_centerness.unsqueeze(2).expand_as(batch_mlvl_scores) ).max(-1) _, topk_inds = batch_mlvl_scores.topk(deploy_nms_pre) batch_inds = torch.arange(batch_mlvl_scores.shape[0]).view( -1, 1).expand_as(topk_inds) batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds, :] batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds, :] batch_mlvl_centerness = batch_mlvl_centerness[batch_inds, topk_inds] # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = batch_mlvl_scores.new_zeros(batch_size, batch_mlvl_scores.shape[1], 1) batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1) if with_nms: det_results = [] for (mlvl_bboxes, mlvl_scores, mlvl_centerness) in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_centerness): det_bbox, det_label = multiclass_nms( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_centerness) det_results.append(tuple([det_bbox, det_label])) else: det_results = [ tuple(mlvl_bs) for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_centerness) ] return det_results
def _get_bboxes_single(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_score_list (list[Tensor]): Box scores for a single scale level Has shape (num_anchors * num_classes, H, W). bbox_pred_list (list[Tensor]): Box energies / deltas for a single scale level with shape (num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=cls_score_list[0].device, dtype=torch.long) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) # Always keep topk op for dynamic input in onnx if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export() or scores.shape[-2] > nms_pre_tensor): from torch import _shape_as_tensor # keep shape as tensor and get k num_anchor = _shape_as_tensor(scores)[-2].to(nms_pre_tensor) nms_pre = torch.where(nms_pre_tensor < num_anchor, nms_pre_tensor, num_anchor) # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) # Set max number of box to be feed into nms in deployment deploy_nms_pre = cfg.get('deploy_nms_pre', -1) if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export(): max_scores, _ = mlvl_scores.max(dim=1) _, topk_inds = max_scores.topk(deploy_nms_pre) mlvl_scores = mlvl_scores[topk_inds, :] mlvl_bboxes = mlvl_bboxes[topk_inds, :] if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
def forward(self, a): d = torch._shape_as_tensor(a)[0] # tensor with empty shape i = torch.ops.aten.Int(d) res = torch.ops.prim.NumToTensor(i) return res
def _get_bboxes(self, pred_maps_list, scale_factors, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: pred_maps_list (list[Tensor]): Prediction maps for different scales of each single image in the batch. scale_factors (list(ndarray)): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where 5 represent (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1. The shape of the second tensor in the tuple is (n,), and each element represents the class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(pred_maps_list) == self.num_levels device = pred_maps_list[0].device batch_size = pred_maps_list[0].shape[0] featmap_sizes = [ pred_maps_list[i].shape[-2:] for i in range(self.num_levels) ] multi_lvl_anchors = self.anchor_generator.grid_anchors( featmap_sizes, device) # convert to tensor to keep tracing nms_pre_tensor = torch.tensor(cfg.get('nms_pre', -1), device=device, dtype=torch.long) multi_lvl_bboxes = [] multi_lvl_cls_scores = [] multi_lvl_conf_scores = [] for i in range(self.num_levels): # get some key info for current scale pred_map = pred_maps_list[i] stride = self.featmap_strides[i] # (b,h, w, num_anchors*num_attrib) -> # (b,h*w*num_anchors, num_attrib) pred_map = pred_map.permute(0, 2, 3, 1).reshape(batch_size, -1, self.num_attrib) # Inplace operation like # ```pred_map[..., :2] = \torch.sigmoid(pred_map[..., :2])``` # would create constant tensor when exporting to onnx pred_map_conf = torch.sigmoid(pred_map[..., :2]) pred_map_rest = pred_map[..., 2:] pred_map = torch.cat([pred_map_conf, pred_map_rest], dim=-1) pred_map_boxes = pred_map[..., :4] multi_lvl_anchor = multi_lvl_anchors[i] multi_lvl_anchor = multi_lvl_anchor.expand_as(pred_map_boxes) bbox_pred = self.bbox_coder.decode(multi_lvl_anchor, pred_map_boxes, stride) # conf and cls conf_pred = torch.sigmoid(pred_map[..., 4]) cls_pred = torch.sigmoid(pred_map[..., 5:]).view( batch_size, -1, self.num_classes) # Cls pred one-hot. # Get top-k prediction # Always keep topk op for dynamic input in onnx if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export() or conf_pred.shape[1] > nms_pre_tensor): from torch import _shape_as_tensor # keep shape as tensor and get k num_anchor = _shape_as_tensor(conf_pred)[1].to(device) nms_pre = torch.where(nms_pre_tensor < num_anchor, nms_pre_tensor, num_anchor) _, topk_inds = conf_pred.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds).long() bbox_pred = bbox_pred[batch_inds, topk_inds, :] cls_pred = cls_pred[batch_inds, topk_inds, :] conf_pred = conf_pred[batch_inds, topk_inds] # Save the result of current scale multi_lvl_bboxes.append(bbox_pred) multi_lvl_cls_scores.append(cls_pred) multi_lvl_conf_scores.append(conf_pred) # Merge the results of different scales together batch_mlvl_bboxes = torch.cat(multi_lvl_bboxes, dim=1) batch_mlvl_scores = torch.cat(multi_lvl_cls_scores, dim=1) batch_mlvl_conf_scores = torch.cat(multi_lvl_conf_scores, dim=1) # Set max number of box to be feed into nms in deployment deploy_nms_pre = cfg.get('deploy_nms_pre', -1) if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export(): _, topk_inds = batch_mlvl_conf_scores.topk(deploy_nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds).long() batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds, :] batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds, :] batch_mlvl_conf_scores = batch_mlvl_conf_scores[batch_inds, topk_inds] if with_nms and (batch_mlvl_conf_scores.size(0) == 0): return torch.zeros((0, 5)), torch.zeros((0, )) if rescale: batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor( scale_factors).unsqueeze(1) # In mmdet 2.x, the class_id for background is num_classes. # i.e., the last column. padding = batch_mlvl_scores.new_zeros(batch_size, batch_mlvl_scores.shape[1], 1) batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1) # Support exporting to onnx without nms if with_nms and cfg.get('nms', None) is not None: det_results = [] for (mlvl_bboxes, mlvl_scores, mlvl_conf_scores) in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_conf_scores): # Filtering out all predictions with conf < conf_thr conf_thr = cfg.get('conf_thr', -1) if conf_thr > 0 and (not torch.onnx.is_in_onnx_export()): # TensorRT not support NonZero # add as_tuple=False for compatibility in Pytorch 1.6 # flatten would create a Reshape op with constant values, # and raise RuntimeError when doing inference in ONNX # Runtime with a different input image (#4221). conf_inds = mlvl_conf_scores.ge(conf_thr).nonzero( as_tuple=False).squeeze(1) mlvl_bboxes = mlvl_bboxes[conf_inds, :] mlvl_scores = mlvl_scores[conf_inds, :] mlvl_conf_scores = mlvl_conf_scores[conf_inds] det_bboxes, det_labels = multiclass_nms( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_conf_scores) det_results.append(tuple([det_bboxes, det_labels])) else: det_results = [ tuple(mlvl_bs) for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores, batch_mlvl_conf_scores) ] return det_results
def shape_as_tensor(x): return torch._shape_as_tensor(x)
def _get_bboxes(self, cls_scores, bbox_preds, mlvl_anchors, img_shapes, scale_factors, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for each scale level with shape (num_total_anchors, 4). img_shapes (list[tuple[int]]): Shape of the input image, (height, width, 3). scale_factors (list[ndarray]): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 5) tensor, where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. The second item is a (n,) tensor where each item is the predicted class labelof the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg cfg = copy.deepcopy(cfg) # bboxes from different level should be independent during NMS, # level_ids are used as labels for batched NMS to separate them level_ids = [] mlvl_scores = [] mlvl_bbox_preds = [] mlvl_valid_anchors = [] batch_size = cls_scores[0].shape[0] nms_pre_tensor = torch.tensor(cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long) for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2) # We set FG labels to [0, num_class-1] and BG label to # num_class in RPN head since mmdet v2.5, which is unified to # be consistent with other head since mmdet v2.0. In mmdet v2.0 # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. scores = rpn_cls_score.softmax(-1)[..., 0] rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, 4) anchors = mlvl_anchors[idx] anchors = anchors.expand_as(rpn_bbox_pred) if nms_pre_tensor > 0: # sort is faster than topk # _, topk_inds = scores.topk(cfg.nms_pre) # keep topk op for dynamic k in onnx model if torch.onnx.is_in_onnx_export(): # sort op will be converted to TopK in onnx # and k<=3480 in TensorRT scores_shape = torch._shape_as_tensor(scores) nms_pre = torch.where(scores_shape[1] < nms_pre_tensor, scores_shape[1], nms_pre_tensor) _, topk_inds = scores.topk(nms_pre) batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) scores = scores[batch_inds, topk_inds] rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] anchors = anchors[batch_inds, topk_inds, :] elif scores.shape[-1] > cfg.nms_pre: ranked_scores, rank_inds = scores.sort(descending=True) topk_inds = rank_inds[:, :cfg.nms_pre] scores = ranked_scores[:, :cfg.nms_pre] batch_inds = torch.arange(batch_size).view( -1, 1).expand_as(topk_inds) rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] anchors = anchors[batch_inds, topk_inds, :] mlvl_scores.append(scores) mlvl_bbox_preds.append(rpn_bbox_pred) mlvl_valid_anchors.append(anchors) level_ids.append( scores.new_full(( batch_size, scores.size(1), ), idx, dtype=torch.long)) batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1) batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1) batch_mlvl_proposals = self.bbox_coder.decode(batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes) batch_mlvl_ids = torch.cat(level_ids, dim=1) # deprecate arguments warning if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg: warnings.warn( 'In rpn_proposal or test_cfg, ' 'nms_thr has been moved to a dict named nms as ' 'iou_threshold, max_num has been renamed as max_per_img, ' 'name of original arguments and the way to specify ' 'iou_threshold of NMS will be deprecated.') if 'nms' not in cfg: cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr)) if 'max_num' in cfg: if 'max_per_img' in cfg: assert cfg.max_num == cfg.max_per_img, f'You ' \ f'set max_num and ' \ f'max_per_img at the same time, but get {cfg.max_num} ' \ f'and {cfg.max_per_img} respectively' \ 'Please delete max_num which will be deprecated.' else: cfg.max_per_img = cfg.max_num if 'nms_thr' in cfg: assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \ f' iou_threshold in nms and ' \ f'nms_thr at the same time, but get' \ f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \ f' respectively. Please delete the nms_thr ' \ f'which will be deprecated.' result_list = [] for (mlvl_proposals, mlvl_scores, mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores, batch_mlvl_ids): # Skip nonzero op while exporting to ONNX if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()): w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0] h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1] valid_ind = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size), as_tuple=False).squeeze() if valid_ind.sum().item() != len(mlvl_proposals): mlvl_proposals = mlvl_proposals[valid_ind, :] mlvl_scores = mlvl_scores[valid_ind] mlvl_ids = mlvl_ids[valid_ind] dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids, cfg.nms) result_list.append(dets[:cfg.max_per_img]) return result_list
def forward(self, tensor): result = torch._shape_as_tensor(tensor) return result + result
#print(flattensize) #print(round(imagesize[0]/16.0)) self.Dense1 = nn.Linear(in_features=flattensize, out_features=1024) self.LeakyReLU2 = nn.LeakyReLU(negative_slope=0.2, inplace=True) self.Dense2 = nn.Linear(in_features=1024, out_features=1) self.Sigmoid = nn.Sigmoid() def forward(self, x): x = 2.0 * x - 1.0 #print(torch._shape_as_tensor(x)) out = self.ConvBlock_module(x) #print(torch._shape_as_tensor(out)) out = self.Dense1(out) out = self.LeakyReLU2(out) out = self.Dense2(out) out = self.Sigmoid(out) return out if __name__ == "__main__": test = Discriminator((3, 296, 296)) keys = test.state_dict().keys() for key in keys: if key.find('weight') != -1: print("name of layer : {}".format(key)) print(torch._shape_as_tensor((test.state_dict())[key])) summary(test, (3, 296, 296), device='cpu')