def get_task_detections(self, num_class_with_bg, batch_cls_preds, batch_reg_preds, batch_cls_labels, img_metas): """Rotate nms for each task. Args: num_class_with_bg (int): Number of classes for the current task. batch_cls_preds (list[torch.Tensor]): Prediction score with the shape of [N]. batch_reg_preds (list[torch.Tensor]): Prediction bbox with the shape of [N, 9]. batch_cls_labels (list[torch.Tensor]): Prediction label with the shape of [N]. img_metas (list[dict]): Meta information of each sample. Returns: list[dict[str: torch.Tensor]]: contains the following keys: -bboxes (torch.Tensor): Prediction bboxes after nms with the \ shape of [N, 9]. -scores (torch.Tensor): Prediction scores after nms with the \ shape of [N]. -labels (torch.Tensor): Prediction labels after nms with the \ shape of [N]. """ predictions_dicts = [] post_center_range = self.test_cfg['post_center_limit_range'] if len(post_center_range) > 0: post_center_range = torch.tensor( post_center_range, dtype=batch_reg_preds[0].dtype, device=batch_reg_preds[0].device) for i, (box_preds, cls_preds, cls_labels) in enumerate( zip(batch_reg_preds, batch_cls_preds, batch_cls_labels)): # Apply NMS in birdeye view # get highest score per prediction, than apply nms # to remove overlapped box. if num_class_with_bg == 1: top_scores = cls_preds.squeeze(-1) top_labels = torch.zeros( cls_preds.shape[0], device=cls_preds.device, dtype=torch.long) else: top_labels = cls_labels.long() top_scores = cls_preds.squeeze(-1) if self.test_cfg['score_threshold'] > 0.0: thresh = torch.tensor( [self.test_cfg['score_threshold']], device=cls_preds.device).type_as(cls_preds) top_scores_keep = top_scores >= thresh top_scores = top_scores.masked_select(top_scores_keep) if top_scores.shape[0] != 0: if self.test_cfg['score_threshold'] > 0.0: box_preds = box_preds[top_scores_keep] top_labels = top_labels[top_scores_keep] boxes_for_nms = xywhr2xyxyr(img_metas[i]['box_type_3d']( box_preds[:, :], self.bbox_coder.code_size).bev) # the nms in 3d detection just remove overlap boxes. selected = nms_gpu( boxes_for_nms, top_scores, thresh=self.test_cfg['nms_thr'], pre_maxsize=self.test_cfg['pre_max_size'], post_max_size=self.test_cfg['post_max_size']) else: selected = [] # if selected is not None: selected_boxes = box_preds[selected] selected_labels = top_labels[selected] selected_scores = top_scores[selected] # finally generate predictions. if selected_boxes.shape[0] != 0: box_preds = selected_boxes scores = selected_scores label_preds = selected_labels final_box_preds = box_preds final_scores = scores final_labels = label_preds if post_center_range is not None: mask = (final_box_preds[:, :3] >= post_center_range[:3]).all(1) mask &= (final_box_preds[:, :3] <= post_center_range[3:]).all(1) predictions_dict = dict( bboxes=final_box_preds[mask], scores=final_scores[mask], labels=final_labels[mask]) else: predictions_dict = dict( bboxes=final_box_preds, scores=final_scores, labels=final_labels) else: dtype = batch_reg_preds[0].dtype device = batch_reg_preds[0].device predictions_dict = dict( bboxes=torch.zeros([0, self.bbox_coder.code_size], dtype=dtype, device=device), scores=torch.zeros([0], dtype=dtype, device=device), labels=torch.zeros([0], dtype=top_labels.dtype, device=device)) predictions_dicts.append(predictions_dict) return predictions_dicts
def _format_bbox(self, results, jsonfile_prefix=None): """Convert the results to the standard format. Args: results (list[dict]): Testing results of the dataset. jsonfile_prefix (str): The prefix of the output jsonfile. You can specify the output directory/filename by modifying the jsonfile_prefix. Default: None. Returns: str: Path of the output json file. """ nusc_annos = {} mapped_class_names = self.CLASSES print('Start to convert detection format...') CAM_NUM = 6 for sample_id, det in enumerate(mmcv.track_iter_progress(results)): if sample_id % CAM_NUM == 0: boxes_per_frame = [] attrs_per_frame = [] # need to merge results from images of the same sample annos = [] boxes, attrs = output_to_nusc_box(det) sample_token = self.data_infos[sample_id]['token'] boxes, attrs = cam_nusc_box_to_global(self.data_infos[sample_id], boxes, attrs, mapped_class_names, self.eval_detection_configs, self.eval_version) boxes_per_frame.extend(boxes) attrs_per_frame.extend(attrs) # Remove redundant predictions caused by overlap of images if (sample_id + 1) % CAM_NUM != 0: continue boxes = global_nusc_box_to_cam( self.data_infos[sample_id + 1 - CAM_NUM], boxes_per_frame, mapped_class_names, self.eval_detection_configs, self.eval_version) cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes) # box nms 3d over 6 images in a frame # TODO: move this global setting into config nms_cfg = dict(use_rotate_nms=True, nms_across_levels=False, nms_pre=4096, nms_thr=0.05, score_thr=0.01, min_bbox_size=0, max_per_frame=500) from mmcv import Config nms_cfg = Config(nms_cfg) cam_boxes3d_for_nms = xywhr2xyxyr(cam_boxes3d.bev) boxes3d = cam_boxes3d.tensor # generate attr scores from attr labels attrs = labels.new_tensor([attr for attr in attrs_per_frame]) boxes3d, scores, labels, attrs = box3d_multiclass_nms( boxes3d, cam_boxes3d_for_nms, scores, nms_cfg.score_thr, nms_cfg.max_per_frame, nms_cfg, mlvl_attr_scores=attrs) cam_boxes3d = CameraInstance3DBoxes(boxes3d, box_dim=9) det = bbox3d2result(cam_boxes3d, scores, labels, attrs) boxes, attrs = output_to_nusc_box(det) boxes, attrs = cam_nusc_box_to_global( self.data_infos[sample_id + 1 - CAM_NUM], boxes, attrs, mapped_class_names, self.eval_detection_configs, self.eval_version) for i, box in enumerate(boxes): name = mapped_class_names[box.label] attr = self.get_attr_name(attrs[i], name) nusc_anno = dict(sample_token=sample_token, translation=box.center.tolist(), size=box.wlh.tolist(), rotation=box.orientation.elements.tolist(), velocity=box.velocity[:2].tolist(), detection_name=name, detection_score=box.score, attribute_name=attr) annos.append(nusc_anno) # other views results of the same frame should be concatenated if sample_token in nusc_annos: nusc_annos[sample_token].extend(annos) else: nusc_annos[sample_token] = annos nusc_submissions = { 'meta': self.modality, 'results': nusc_annos, } mmcv.mkdir_or_exist(jsonfile_prefix) res_path = osp.join(jsonfile_prefix, 'results_nusc.json') print('Results writes to', res_path) mmcv.dump(nusc_submissions, res_path) return res_path
def get_bboxes_single(self, cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors, input_meta, cfg, rescale=False): """Get bboxes of single branch. Args: cls_scores (torch.Tensor): Class score in single batch. bbox_preds (torch.Tensor): Bbox prediction in single batch. dir_cls_preds (torch.Tensor): Predictions of direction class in single batch. mlvl_anchors (List[torch.Tensor]): Multi-level anchors in single batch. input_meta (list[dict]): Contain pcd and img's meta info. cfg (None | :obj:`ConfigDict`): Training or testing config. rescale (list[torch.Tensor]): whether th rescale bbox. Returns: dict: Predictions of single batch containing the following keys: - boxes_3d (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes. - scores_3d (torch.Tensor): Score of each bbox. - labels_3d (torch.Tensor): Label of each bbox. - cls_preds (torch.Tensor): Class score of each bbox. """ assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_max_scores = [] mlvl_label_pred = [] mlvl_dir_scores = [] mlvl_cls_score = [] for cls_score, bbox_pred, dir_cls_pred, anchors in zip( cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:] dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2) dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.num_classes) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, self.box_code_size) nms_pre = cfg.get('nms_pre', -1) if self.use_sigmoid_cls: max_scores, pred_labels = scores.max(dim=1) else: max_scores, pred_labels = scores[:, :-1].max(dim=1) # get topk if nms_pre > 0 and scores.shape[0] > nms_pre: topk_scores, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] max_scores = topk_scores cls_score = scores[topk_inds, :] dir_cls_score = dir_cls_score[topk_inds] pred_labels = pred_labels[topk_inds] bboxes = self.bbox_coder.decode(anchors, bbox_pred) mlvl_bboxes.append(bboxes) mlvl_max_scores.append(max_scores) mlvl_cls_score.append(cls_score) mlvl_label_pred.append(pred_labels) mlvl_dir_scores.append(dir_cls_score) mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d']( mlvl_bboxes, box_dim=self.box_code_size).bev) mlvl_max_scores = torch.cat(mlvl_max_scores) mlvl_label_pred = torch.cat(mlvl_label_pred) mlvl_dir_scores = torch.cat(mlvl_dir_scores) # shape [k, num_class] before sigmoid # PartA2 need to keep raw classification score # becase the bbox head in the second stage does not have # classification branch, # roi head need this score as classification score mlvl_cls_score = torch.cat(mlvl_cls_score) score_thr = cfg.get('score_thr', 0) result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_max_scores, mlvl_label_pred, mlvl_cls_score, mlvl_dir_scores, score_thr, cfg.nms_post, cfg, input_meta) return result
def get_bboxes_single(self, cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors, input_meta, cfg=None, rescale=False): """Get bboxes of single branch. Args: cls_scores (torch.Tensor): Class score in single batch. bbox_preds (torch.Tensor): Bbox prediction in single batch. dir_cls_preds (torch.Tensor): Predictions of direction class in single batch. mlvl_anchors (List[torch.Tensor]): Multi-level anchors in single batch. input_meta (list[dict]): Contain pcd and img's meta info. cfg (None | :obj:`ConfigDict`): Training or testing config. rescale (list[torch.Tensor]): whether th rescale bbox. Returns: tuple: Contain predictions of single batch. - bboxes (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes. - scores (torch.Tensor): Class score of each bbox. - labels (torch.Tensor): Label of each bbox. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_dir_scores = [] for cls_score, bbox_pred, dir_cls_pred, anchors in zip( cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:] dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2) dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.num_classes) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, self.box_code_size) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] dir_cls_score = dir_cls_score[topk_inds] bboxes = self.bbox_coder.decode(anchors, bbox_pred) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_dir_scores.append(dir_cls_score) mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d']( mlvl_bboxes, box_dim=self.box_code_size).bev) mlvl_scores = torch.cat(mlvl_scores) mlvl_dir_scores = torch.cat(mlvl_dir_scores) if self.use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) score_thr = cfg.get('score_thr', 0) results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_scores, score_thr, cfg.max_num, cfg, mlvl_dir_scores) bboxes, scores, labels, dir_scores = results if bboxes.shape[0] > 0: dir_rot = limit_period(bboxes[..., 6] - self.dir_offset, self.dir_limit_offset, np.pi) bboxes[..., 6] = (dir_rot + self.dir_offset + np.pi * dir_scores.to(bboxes.dtype)) bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size) return bboxes, scores, labels
def _get_bboxes_single(self, cls_scores, bbox_preds, dir_cls_preds, attr_preds, centernesses, mlvl_points, input_meta, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for a single scale level Has shape (num_points * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for a single scale level with shape (num_points * bbox_code_size, H, W). dir_cls_preds (list[Tensor]): Box scores for direction class predictions on a single scale level with shape \ (num_points * 2, H, W) attr_preds (list[Tensor]): Attribute scores for each scale level Has shape (N, num_points * num_attrs, H, W) centernesses (list[Tensor]): Centerness for a single scale level with shape (num_points, H, W). mlvl_points (list[Tensor]): Box reference for a single scale level with shape (num_total_points, 2). input_meta (dict): Metadata of input image. cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: tuples[Tensor]: Predicted 3D boxes, scores, labels and attributes. """ view = np.array(input_meta['cam2img']) scale_factor = input_meta['scale_factor'] cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) mlvl_centers2d = [] mlvl_bboxes = [] mlvl_scores = [] mlvl_dir_scores = [] mlvl_attr_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, \ points in zip(cls_scores, bbox_preds, dir_cls_preds, attr_preds, centernesses, mlvl_points): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2) dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1] attr_pred = attr_pred.permute(1, 2, 0).reshape(-1, self.num_attrs) attr_score = torch.max(attr_pred, dim=-1)[1] centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, sum(self.group_reg_dims)) bbox_pred = bbox_pred[:, :self.bbox_code_size] nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] dir_cls_pred = dir_cls_pred[topk_inds, :] centerness = centerness[topk_inds] dir_cls_score = dir_cls_score[topk_inds] attr_score = attr_score[topk_inds] # change the offset to actual center predictions bbox_pred[:, :2] = points - bbox_pred[:, :2] if rescale: bbox_pred[:, :2] /= bbox_pred[:, :2].new_tensor(scale_factor) pred_center2d = bbox_pred[:, :3].clone() bbox_pred[:, :3] = self.pts2Dto3D(bbox_pred[:, :3], view) mlvl_centers2d.append(pred_center2d) mlvl_bboxes.append(bbox_pred) mlvl_scores.append(scores) mlvl_dir_scores.append(dir_cls_score) mlvl_attr_scores.append(attr_score) mlvl_centerness.append(centerness) mlvl_centers2d = torch.cat(mlvl_centers2d) mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_dir_scores = torch.cat(mlvl_dir_scores) # change local yaw to global yaw for 3D nms if mlvl_bboxes.shape[0] > 0: dir_rot = limit_period(mlvl_bboxes[..., 6] - self.dir_offset, 0, np.pi) mlvl_bboxes[..., 6] = (dir_rot + self.dir_offset + np.pi * mlvl_dir_scores.to(mlvl_bboxes.dtype)) cam_intrinsic = mlvl_centers2d.new_zeros((4, 4)) cam_intrinsic[:view.shape[0], :view.shape[1]] = \ mlvl_centers2d.new_tensor(view) mlvl_bboxes[:, 6] = torch.atan2( mlvl_centers2d[:, 0] - cam_intrinsic[0, 2], cam_intrinsic[0, 0]) + mlvl_bboxes[:, 6] mlvl_bboxes_for_nms = xywhr2xyxyr( input_meta['box_type_3d'](mlvl_bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5)).bev) mlvl_scores = torch.cat(mlvl_scores) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) mlvl_attr_scores = torch.cat(mlvl_attr_scores) mlvl_centerness = torch.cat(mlvl_centerness) # no scale_factors in box3d_multiclass_nms # Then we multiply it from outside mlvl_nms_scores = mlvl_scores * mlvl_centerness[:, None] results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_nms_scores, cfg.score_thr, cfg.max_per_img, cfg, mlvl_dir_scores, mlvl_attr_scores) bboxes, scores, labels, dir_scores, attrs = results attrs = attrs.to(labels.dtype) # change data type to int bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5)) # Note that the predictions use origin (0.5, 0.5, 0.5) # Due to the ground truth centers2d are the gravity center of objects # v0.10.0 fix inplace operation to the input tensor of cam_box3d # So here we also need to add origin=(0.5, 0.5, 0.5) if not self.pred_attrs: attrs = None return bboxes, scores, labels, attrs