def test_groupfree3dnet(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) groupfree3d_cfg = _get_detector_cfg( 'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py') self = build_detector(groupfree3d_cfg).cuda() points_0 = torch.rand([50000, 3], device='cuda') points_1 = torch.rand([50000, 3], device='cuda') points = [points_0, points_1] img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes) img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes) img_metas = [img_meta_0, img_meta_1] gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda')) gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda')) gt_bboxes = [gt_bbox_0, gt_bbox_1] gt_labels_0 = torch.randint(0, 18, [10], device='cuda') gt_labels_1 = torch.randint(0, 18, [10], device='cuda') gt_labels = [gt_labels_0, gt_labels_1] pts_instance_mask_1 = torch.randint(0, 10, [50000], device='cuda') pts_instance_mask_2 = torch.randint(0, 10, [50000], device='cuda') pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2] pts_semantic_mask_1 = torch.randint(0, 19, [50000], device='cuda') pts_semantic_mask_2 = torch.randint(0, 19, [50000], device='cuda') pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2] # test forward_train losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels, pts_semantic_mask, pts_instance_mask) assert losses['sampling_objectness_loss'] >= 0 assert losses['s5.objectness_loss'] >= 0 assert losses['s5.semantic_loss'] >= 0 assert losses['s5.center_loss'] >= 0 assert losses['s5.dir_class_loss'] >= 0 assert losses['s5.dir_res_loss'] >= 0 assert losses['s5.size_class_loss'] >= 0 assert losses['s5.size_res_loss'] >= 0 # test simple_test with torch.no_grad(): results = self.simple_test(points, img_metas) boxes_3d = results[0]['boxes_3d'] scores_3d = results[0]['scores_3d'] labels_3d = results[0]['labels_3d'] assert boxes_3d.tensor.shape[0] >= 0 assert boxes_3d.tensor.shape[1] == 7 assert scores_3d.shape[0] >= 0 assert labels_3d.shape[0] >= 0
def get_ann_info(self, index): """Get annotation info according to the given index. Args: index (int): Index of the annotation data to get. Returns: dict: annotation information consists of the following keys: - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ 3D ground truth bboxes - gt_labels_3d (np.ndarray): Labels of ground truths. - pts_instance_mask_path (str): Path of instance masks. - pts_semantic_mask_path (str): Path of semantic masks. """ # Use index to get the annos, thus the evalhook could also use this api info = self.data_infos[index] if info['annos']['gt_num'] != 0: gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( np.float32) # k, 6 gt_labels_3d = info['annos']['class'].astype(np.long) else: gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32) gt_labels_3d = np.zeros((0, ), dtype=np.long) # to target box structure gt_bboxes_3d = DepthInstance3DBoxes(gt_bboxes_3d, origin=(0.5, 0.5, 0.5)).convert_to( self.box_mode_3d) anns_results = dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d) return anns_results
def show(self, results, out_dir, show=True, pipeline=None): """Results visualization. Args: results (list[dict]): List of bounding boxes results. out_dir (str): Output directory of visualization result. show (bool): Visualize the results online. pipeline (list[dict], optional): raw data loading for showing. Default: None. """ assert out_dir is not None, 'Expect out_dir, got none.' pipeline = self._get_pipeline(pipeline) for i, result in enumerate(results): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] points, img_metas, img, calib = self._extract_data( i, pipeline, ['points', 'img_metas', 'img', 'calib']) # scale colors to [0, 255] points = points.numpy() points[:, 3:] *= 255 gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir, file_name, show) # multi-modality visualization if self.modality['use_camera'] and 'calib' in data_info.keys(): img = img.numpy() # need to transpose channel to first dim img = img.transpose(1, 2, 0) pred_bboxes = DepthInstance3DBoxes( pred_bboxes, origin=(0.5, 0.5, 0)) gt_bboxes = DepthInstance3DBoxes( gt_bboxes, origin=(0.5, 0.5, 0)) show_multi_modality_result( img, gt_bboxes, pred_bboxes, calib, out_dir, file_name, depth_bbox=True, img_metas=img_metas, show=show)
def show(self, results, out_dir, show=True): """Results visualization. Args: results (list[dict]): List of bounding boxes results. out_dir (str): Output directory of visualization result. show (bool): Visualize the results online. """ assert out_dir is not None, 'Expect out_dir, got none.' for i, result in enumerate(results): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] if hasattr(self, 'pipeline'): example = self.prepare_test_data(i) else: example = None points = np.fromfile(osp.join(self.data_root, pts_path), dtype=np.float32).reshape(-1, 6) points[:, 3:] *= 255 gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir, file_name, show) # multi-modality visualization if self.modality['use_camera'] and example is not None and \ 'calib' in data_info.keys(): img = mmcv.imread(example['img_metas']._data['filename']) pred_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0)) gt_bboxes = DepthInstance3DBoxes(gt_bboxes, origin=(0.5, 0.5, 0)) show_multi_modality_result( img, gt_bboxes, pred_bboxes, example['calib'], out_dir, file_name, depth_bbox=True, img_metas=example['img_metas']._data, show=show)
def test_vote_net(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) vote_net_cfg = _get_detector_cfg( 'votenet/votenet_16x8_sunrgbd-3d-10class.py') self = build_detector(vote_net_cfg).cuda() points_0 = torch.rand([2000, 4], device='cuda') points_1 = torch.rand([2000, 4], device='cuda') points = [points_0, points_1] img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes) img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes) img_metas = [img_meta_0, img_meta_1] gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda')) gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda')) gt_bboxes = [gt_bbox_0, gt_bbox_1] gt_labels_0 = torch.randint(0, 10, [10], device='cuda') gt_labels_1 = torch.randint(0, 10, [10], device='cuda') gt_labels = [gt_labels_0, gt_labels_1] # test forward_train losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels) assert losses['vote_loss'] >= 0 assert losses['objectness_loss'] >= 0 assert losses['semantic_loss'] >= 0 assert losses['center_loss'] >= 0 assert losses['dir_class_loss'] >= 0 assert losses['dir_res_loss'] >= 0 assert losses['size_class_loss'] >= 0 assert losses['size_res_loss'] >= 0 # test simple_test with torch.no_grad(): results = self.simple_test(points, img_metas) boxes_3d = results[0]['boxes_3d'] scores_3d = results[0]['scores_3d'] labels_3d = results[0]['labels_3d'] assert boxes_3d.tensor.shape[0] >= 0 assert boxes_3d.tensor.shape[1] == 7 assert scores_3d.shape[0] >= 0 assert labels_3d.shape[0] >= 0
def test_merge_aug_bboxes_3d(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') img_meta_0 = dict( pcd_horizontal_flip=False, pcd_vertical_flip=True, pcd_scale_factor=1.0) img_meta_1 = dict( pcd_horizontal_flip=True, pcd_vertical_flip=False, pcd_scale_factor=1.0) img_meta_2 = dict( pcd_horizontal_flip=False, pcd_vertical_flip=False, pcd_scale_factor=0.5) img_metas = [[img_meta_0], [img_meta_1], [img_meta_2]] boxes_3d = DepthInstance3DBoxes( torch.tensor( [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956], [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500], [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]], device='cuda')) labels_3d = torch.tensor([0, 7, 6]) scores_3d = torch.tensor([0.5, 1.0, 1.0]) aug_result = dict( boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d) aug_results = [aug_result, aug_result, aug_result] test_cfg = mmcv.ConfigDict( use_rotate_nms=True, nms_across_levels=False, nms_thr=0.01, score_thr=0.1, min_bbox_size=0, nms_pre=100, max_num=50) results = merge_aug_bboxes_3d(aug_results, img_metas, test_cfg) expected_boxes_3d = torch.tensor( [[-1.0864, -1.9045, -1.2000, 0.7128, 1.5631, 2.1045, -0.1022], [1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 3.0394], [-2.1728, 3.8090, -2.4000, 1.4256, 3.1262, 4.2090, 0.1022], [2.5831, -4.8117, -1.2733, 0.5852, 0.8832, 0.9733, -1.6500], [-2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.4916], [5.1662, 9.6234, -2.5466, 1.1704, 1.7664, 1.9466, 1.6500], [1.0473, -4.1687, -1.2317, 2.3021, 1.8876, 1.9696, -1.6956], [-1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.4460], [2.0946, 8.3374, -2.4634, 4.6042, 3.7752, 3.9392, 1.6956]]) expected_scores_3d = torch.tensor([ 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.5000, 0.5000 ]) expected_labels_3d = torch.tensor([6, 6, 6, 7, 7, 7, 0, 0, 0]) assert torch.allclose(results['boxes_3d'].tensor, expected_boxes_3d) assert torch.allclose(results['scores_3d'], expected_scores_3d) assert torch.all(results['labels_3d'] == expected_labels_3d)
def test_3dssd(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) ssd3d_cfg = _get_detector_cfg('3dssd/3dssd_4x4_kitti-3d-car.py') self = build_detector(ssd3d_cfg).cuda() points_0 = torch.rand([2000, 4], device='cuda') points_1 = torch.rand([2000, 4], device='cuda') points = [points_0, points_1] img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes) img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes) img_metas = [img_meta_0, img_meta_1] gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda')) gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda')) gt_bboxes = [gt_bbox_0, gt_bbox_1] gt_labels_0 = torch.zeros([10], device='cuda').long() gt_labels_1 = torch.zeros([10], device='cuda').long() gt_labels = [gt_labels_0, gt_labels_1] # test forward_train losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels) assert losses['vote_loss'] >= 0 assert losses['centerness_loss'] >= 0 assert losses['center_loss'] >= 0 assert losses['dir_class_loss'] >= 0 assert losses['dir_res_loss'] >= 0 assert losses['corner_loss'] >= 0 assert losses['size_res_loss'] >= 0 # test simple_test with torch.no_grad(): results = self.simple_test(points, img_metas) boxes_3d = results[0]['boxes_3d'] scores_3d = results[0]['scores_3d'] labels_3d = results[0]['labels_3d'] assert boxes_3d.tensor.shape[0] >= 0 assert boxes_3d.tensor.shape[1] == 7 assert scores_3d.shape[0] >= 0 assert labels_3d.shape[0] >= 0
def test_load_annotations3D(): # Test scannet LoadAnnotations3D scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0] scannet_load_annotations3D = LoadAnnotations3D( with_bbox_3d=True, with_label_3d=True, with_mask_3d=True, with_seg_3d=True) scannet_results = dict() data_path = './tests/data/scannet' if scannet_info['annos']['gt_num'] != 0: scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth'] scannet_gt_labels_3d = scannet_info['annos']['class'] else: scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32) scannet_gt_labels_3d = np.zeros((1, )) # prepare input of loading pipeline scannet_results['ann_info'] = dict() scannet_results['ann_info']['pts_instance_mask_path'] = osp.join( data_path, scannet_info['pts_instance_mask_path']) scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join( data_path, scannet_info['pts_semantic_mask_path']) scannet_results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes( scannet_gt_bboxes_3d, box_dim=6, with_yaw=False) scannet_results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d scannet_results['bbox3d_fields'] = [] scannet_results['pts_mask_fields'] = [] scannet_results['pts_seg_fields'] = [] scannet_results = scannet_load_annotations3D(scannet_results) scannet_gt_boxes = scannet_results['gt_bboxes_3d'] scannet_gt_labels = scannet_results['gt_labels_3d'] scannet_pts_instance_mask = scannet_results['pts_instance_mask'] scannet_pts_semantic_mask = scannet_results['pts_semantic_mask'] repr_str = repr(scannet_load_annotations3D) expected_repr_str = 'LoadAnnotations3D(\n with_bbox_3d=True, ' \ 'with_label_3d=True, with_attr_label=False, ' \ 'with_mask_3d=True, with_seg_3d=True, ' \ 'with_bbox=False, with_label=False, ' \ 'with_mask=False, with_seg=False, ' \ 'with_bbox_depth=False, poly2mask=True)' assert repr_str == expected_repr_str assert scannet_gt_boxes.tensor.shape == (27, 7) assert scannet_gt_labels.shape == (27, ) assert scannet_pts_instance_mask.shape == (100, ) assert scannet_pts_semantic_mask.shape == (100, )
def get_ann_info(self, index): """Get annotation info according to the given index. Args: index (int): Index of the annotation data to get. Returns: dict: annotation information consists of the following keys: - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ 3D ground truth bboxes - gt_labels_3d (np.ndarray): Labels of ground truths. - pts_instance_mask_path (str): Path of instance masks. - pts_semantic_mask_path (str): Path of semantic masks. - axis_align_matrix (np.ndarray): Transformation matrix for \ global scene alignment. """ # Use index to get the annos, thus the evalhook could also use this api info = self.data_infos[index] if info['annos']['gt_num'] != 0: gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( np.float32) # k, 6 gt_labels_3d = info['annos']['class'].astype(np.long) else: gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32) gt_labels_3d = np.zeros((0, ), dtype=np.long) # to target box structure gt_bboxes_3d = DepthInstance3DBoxes( gt_bboxes_3d, box_dim=gt_bboxes_3d.shape[-1], with_yaw=False, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) pts_instance_mask_path = osp.join(self.data_root, info['pts_instance_mask_path']) pts_semantic_mask_path = osp.join(self.data_root, info['pts_semantic_mask_path']) axis_align_matrix = self._get_axis_align_matrix(info) anns_results = dict( gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d, pts_instance_mask_path=pts_instance_mask_path, pts_semantic_mask_path=pts_semantic_mask_path, axis_align_matrix=axis_align_matrix) return anns_results
def test_scannet_pipeline(): class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') np.random.seed(0) pipelines = [ dict(type='LoadPointsFromFile', coord_type='DEPTH', shift_height=True, load_dim=6, use_dim=[0, 1, 2]), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_mask_3d=True, with_seg_3d=True), dict(type='IndoorPointSample', num_points=5), dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=1.0, flip_ratio_bev_vertical=1.0), dict(type='GlobalRotScaleTrans', rot_range=[-0.087266, 0.087266], scale_ratio_range=[1.0, 1.0], shift_height=True), dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='Collect3D', keys=[ 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'pts_instance_mask' ]), ] pipeline = Compose(pipelines) info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0] results = dict() data_path = './tests/data/scannet' results['pts_filename'] = osp.join(data_path, info['pts_path']) if info['annos']['gt_num'] != 0: scannet_gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( np.float32) scannet_gt_labels_3d = info['annos']['class'].astype(np.long) else: scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32) scannet_gt_labels_3d = np.zeros((1, ), dtype=np.long) results['ann_info'] = dict() results['ann_info']['pts_instance_mask_path'] = osp.join( data_path, info['pts_instance_mask_path']) results['ann_info']['pts_semantic_mask_path'] = osp.join( data_path, info['pts_semantic_mask_path']) results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes( scannet_gt_bboxes_3d, box_dim=6, with_yaw=False) results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d results['img_fields'] = [] results['bbox3d_fields'] = [] results['pts_mask_fields'] = [] results['pts_seg_fields'] = [] results = pipeline(results) points = results['points']._data gt_bboxes_3d = results['gt_bboxes_3d']._data gt_labels_3d = results['gt_labels_3d']._data pts_semantic_mask = results['pts_semantic_mask']._data pts_instance_mask = results['pts_instance_mask']._data expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895], [-0.4065, -3.4857, 2.1330, 2.1682], [-1.4578, 1.3510, -0.0441, -0.0089], [2.2428, -1.1323, -0.0288, 0.0064], [0.7052, -2.9752, 1.5560, 1.5912]]) expected_gt_bboxes_3d = torch.tensor( [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000], [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000], [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000], [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000], [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]]) expected_gt_labels_3d = np.array([ 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, 0, 0, 0, 5, 5, 5 ]) expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) assert torch.allclose(points, expected_points, 1e-2) assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d, 1e-2) assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d) assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask) assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
def test_sunrgbd_pipeline(): class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub') np.random.seed(0) pipelines = [ dict(type='LoadPointsFromFile', coord_type='DEPTH', shift_height=True, load_dim=6, use_dim=[0, 1, 2]), dict(type='LoadAnnotations3D'), dict( type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=1.0, ), dict(type='GlobalRotScaleTrans', rot_range=[-0.523599, 0.523599], scale_ratio_range=[0.85, 1.15], shift_height=True), dict(type='IndoorPointSample', num_points=5), dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), ] pipeline = Compose(pipelines) results = dict() info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0] data_path = './tests/data/sunrgbd' results['pts_filename'] = osp.join(data_path, info['pts_path']) if info['annos']['gt_num'] != 0: gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( np.float32) gt_labels_3d = info['annos']['class'].astype(np.long) else: gt_bboxes_3d = np.zeros((1, 7), dtype=np.float32) gt_labels_3d = np.zeros((1, ), dtype=np.long) # prepare input of pipeline results['ann_info'] = dict() results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d) results['ann_info']['gt_labels_3d'] = gt_labels_3d results['img_fields'] = [] results['bbox3d_fields'] = [] results['pts_mask_fields'] = [] results['pts_seg_fields'] = [] results = pipeline(results) points = results['points']._data gt_bboxes_3d = results['gt_bboxes_3d']._data gt_labels_3d = results['gt_labels_3d']._data expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905], [0.8707, 1.3635, 0.0437, 0.0238], [0.8636, 1.3511, 0.0504, 0.0304], [0.8690, 1.3461, 0.1265, 0.1065], [0.8668, 1.3434, 0.1216, 0.1017]]) expected_gt_bboxes_3d = torch.tensor( [[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989], [-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446], [0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]]) expected_gt_labels_3d = np.array([0, 7, 6]) assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3) assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d) assert torch.allclose(points, expected_points, 1e-2)
def test_partial_bin_based_box_coder(): box_coder_cfg = dict(type='PartialBinBasedBBoxCoder', num_sizes=10, num_dir_bins=12, with_rot=True, mean_sizes=[[2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182], [0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272], [0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364], [0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424], [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728]]) box_coder = build_bbox_coder(box_coder_cfg) # test eocode gt_bboxes = DepthInstance3DBoxes( [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486], [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030], [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]]) gt_labels = torch.tensor([0, 1, 2]) center_target, size_class_target, size_res_target, dir_class_target, \ dir_res_target = box_coder.encode(gt_bboxes, gt_labels) expected_center_target = torch.tensor([[0.8308, 4.1168, -0.2413], [2.3002, 4.8149, -0.7687], [-1.1477, 1.8090, -0.1444]]) expected_size_class_target = torch.tensor([0, 1, 2]) expected_size_res_target = torch.tensor([[0.1350, 0.2241, 0.9972], [-0.2193, -0.4166, 0.2328], [-0.2270, -0.3401, 1.2108]]) expected_dir_class_target = torch.tensor([3, 3, 0]) expected_dir_res_target = torch.tensor([0.0778, 0.0322, 0.0552]) assert torch.allclose(center_target, expected_center_target, atol=1e-4) assert torch.all(size_class_target == expected_size_class_target) assert torch.allclose(size_res_target, expected_size_res_target, atol=1e-4) assert torch.all(dir_class_target == expected_dir_class_target) assert torch.allclose(dir_res_target, expected_dir_res_target, atol=1e-4) # test decode center = torch.tensor([[[0.8014, 3.4134, -0.6133], [2.6375, 8.4191, 2.0438], [4.2017, 5.2504, -0.7851], [-1.0088, 5.4107, 1.6293], [1.4837, 4.0268, 0.6222]]]) size_class = torch.tensor([[[ -1.0061, -2.2788, 1.1322, -4.4380, -11.0526, -2.8113, -2.0642, -7.5886, -4.8627, -5.0437 ], [ -2.2058, -0.3527, -1.9976, 0.8815, -2.7980, -1.9053, -0.5097, -2.0232, -1.4242, -4.1192 ], [ -1.4783, -0.1009, -1.1537, 0.3052, -4.3147, -2.6529, 0.2729, -0.3755, -2.6479, -3.7548 ], [ -6.1809, -3.5024, -8.3273, 1.1252, -4.3315, -7.8288, -4.6091, -5.8153, 0.7480, -10.1396 ], [ -9.0424, -3.7883, -6.0788, -1.8855, -10.2493, -9.7164, -1.0658, -4.1713, 1.1173, -10.6204 ]]]) size_res = torch.tensor([[[[-9.8976e-02, -5.2152e-01, -7.6421e-02], [1.4593e-01, 5.6099e-01, 8.9421e-02], [5.1481e-02, 3.9280e-01, 1.2705e-01], [3.6869e-01, 7.0558e-01, 1.4647e-01], [4.7683e-01, 3.3644e-01, 2.3481e-01], [8.7346e-02, 8.4987e-01, 3.3265e-01], [2.1393e-01, 8.5585e-01, 9.8948e-02], [7.8530e-02, 5.9694e-02, -8.7211e-02], [1.8551e-01, 1.1308e+00, -5.1864e-01], [3.6485e-01, 7.3757e-01, 1.5264e-01]], [[-9.5593e-01, -5.0455e-01, 1.9554e-01], [-1.0870e-01, 1.8025e-01, 1.0228e-01], [-8.2882e-02, -4.3771e-01, 9.2135e-02], [-4.0840e-02, -5.9841e-02, 1.1982e-01], [7.3448e-02, 5.2045e-02, 1.7301e-01], [-4.0440e-02, 4.9532e-02, 1.1266e-01], [3.5857e-02, 1.3564e-02, 1.0212e-01], [-1.0407e-01, -5.9321e-02, 9.2622e-02], [7.4691e-03, 9.3080e-02, -4.4077e-01], [-6.0121e-02, -1.3381e-01, -6.8083e-02]], [[-9.3970e-01, -9.7823e-01, -5.1075e-02], [-1.2843e-01, -1.8381e-01, 7.1327e-02], [-1.2247e-01, -8.1115e-01, 3.6495e-02], [4.9154e-02, -4.5440e-02, 8.9520e-02], [1.5653e-01, 3.5990e-02, 1.6414e-01], [-5.9621e-02, 4.9357e-03, 1.4264e-01], [8.5235e-04, -1.0030e-01, -3.0712e-02], [-3.7255e-02, 2.8996e-02, 5.5545e-02], [3.9298e-02, -4.7420e-02, -4.9147e-01], [-1.1548e-01, -1.5895e-01, -3.9155e-02]], [[-1.8725e+00, -7.4102e-01, 1.0524e+00], [-3.3210e-01, 4.7828e-02, -3.2666e-02], [-2.7949e-01, 5.5541e-02, -1.0059e-01], [-8.5533e-02, 1.4870e-01, -1.6709e-01], [3.8283e-01, 2.6609e-01, 2.1361e-01], [-4.2156e-01, 3.2455e-01, 6.7309e-01], [-2.4336e-02, -8.3366e-02, 3.9913e-01], [8.2142e-03, 4.8323e-02, -1.5247e-01], [-4.8142e-02, -3.0074e-01, -1.6829e-01], [1.3274e-01, -2.3825e-01, -1.8127e-01]], [[-1.2576e+00, -6.1550e-01, 7.9430e-01], [-4.7222e-01, 1.5634e+00, -5.9460e-02], [-3.5367e-01, 1.3616e+00, -1.6421e-01], [-1.6611e-02, 2.4231e-01, -9.6188e-02], [5.4486e-01, 4.6833e-01, 5.1151e-01], [-6.1755e-01, 1.0292e+00, 1.2458e+00], [-6.8152e-02, 2.4786e-01, 9.5088e-01], [-4.8745e-02, 1.5134e-01, -9.9962e-02], [2.4485e-03, -7.5991e-02, 1.3545e-01], [4.1608e-01, -1.2093e-01, -3.1643e-01]]]]) dir_class = torch.tensor([[[ -1.0230, -5.1965, -5.2195, 2.4030, -2.7661, -7.3399, -1.1640, -4.0630, -5.2940, 0.8245, -3.1869, -6.1743 ], [ -1.9503, -1.6940, -0.8716, -1.1494, -0.8196, 0.2862, -0.2921, -0.7894, -0.2481, -0.9916, -1.4304, -1.2466 ], [ -1.7435, -1.2043, -0.1265, 0.5083, -0.0717, -0.9560, -1.6171, -2.6463, -2.3863, -2.1358, -1.8812, -2.3117 ], [ -1.9282, 0.3792, -1.8426, -1.4587, -0.8582, -3.4639, -3.2133, -3.7867, -7.6781, -6.4459, -6.2455, -5.4797 ], [ -3.1869, 0.4456, -0.5824, 0.9994, -1.0554, -8.4232, -7.7019, -7.1382, -10.2724, -7.8229, -8.1860, -8.6194 ]]]) dir_res = torch.tensor( [[[ 1.1022e-01, -2.3750e-01, 2.0381e-01, 1.2177e-01, -2.8501e-01, 1.5351e-01, 1.2218e-01, -2.0677e-01, 1.4468e-01, 1.1593e-01, -2.6864e-01, 1.1290e-01 ], [ -1.5788e-02, 4.1538e-02, -2.2857e-04, -1.4011e-02, 4.2560e-02, -3.1186e-03, -5.0343e-02, 6.8110e-03, -2.6728e-02, -3.2781e-02, 3.6889e-02, -1.5609e-03 ], [ 1.9004e-02, 5.7105e-03, 6.0329e-02, 1.3074e-02, -2.5546e-02, -1.1456e-02, -3.2484e-02, -3.3487e-02, 1.6609e-03, 1.7095e-02, 1.2647e-05, 2.4814e-02 ], [ 1.4482e-01, -6.3083e-02, 5.8307e-02, 9.1396e-02, -8.4571e-02, 4.5890e-02, 5.6243e-02, -1.2448e-01, -9.5244e-02, 4.5746e-02, -1.7390e-02, 9.0267e-02 ], [ 1.8065e-01, -2.0078e-02, 8.5401e-02, 1.0784e-01, -1.2495e-01, 2.2796e-02, 1.1310e-01, -8.4364e-02, -1.1904e-01, 6.1180e-02, -1.8109e-02, 1.1229e-01 ]]]) bbox_out = dict(center=center, size_class=size_class, size_res=size_res, dir_class=dir_class, dir_res=dir_res) bbox3d = box_coder.decode(bbox_out) expected_bbox3d = torch.tensor( [[[0.8014, 3.4134, -0.6133, 0.9750, 2.2602, 0.9725, 1.6926], [2.6375, 8.4191, 2.0438, 0.5511, 0.4931, 0.9471, 2.6149], [4.2017, 5.2504, -0.7851, 0.6411, 0.5075, 0.9168, 1.5839], [-1.0088, 5.4107, 1.6293, 0.5064, 0.7017, 0.6602, 0.4605], [1.4837, 4.0268, 0.6222, 0.4071, 0.9951, 1.8243, 1.6786]]]) assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4) # test split_pred box_preds = torch.rand(2, 79, 256) base_xyz = torch.rand(2, 256, 3) results = box_coder.split_pred(box_preds, base_xyz) obj_scores = results['obj_scores'] center = results['center'] dir_class = results['dir_class'] dir_res_norm = results['dir_res_norm'] dir_res = results['dir_res'] size_class = results['size_class'] size_res_norm = results['size_res_norm'] size_res = results['size_res'] sem_scores = results['sem_scores'] assert obj_scores.shape == torch.Size([2, 256, 2]) assert center.shape == torch.Size([2, 256, 3]) assert dir_class.shape == torch.Size([2, 256, 12]) assert dir_res_norm.shape == torch.Size([2, 256, 12]) assert dir_res.shape == torch.Size([2, 256, 12]) assert size_class.shape == torch.Size([2, 256, 10]) assert size_res_norm.shape == torch.Size([2, 256, 10, 3]) assert size_res.shape == torch.Size([2, 256, 10, 3]) assert sem_scores.shape == torch.Size([2, 256, 10])
def test_show_result_meshlab(): pcd = 'tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+' \ '0800__LIDAR_TOP__1533201470948018.pcd.bin' box_3d = LiDARInstance3DBoxes( torch.tensor( [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) labels_3d = torch.tensor([0]) scores_3d = torch.tensor([0.5]) points = np.random.rand(100, 4) img_meta = dict(pts_filename=pcd, boxes_3d=box_3d, box_mode_3d=Box3DMode.LIDAR) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]]) result = [ dict(pts_bbox=dict( boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) ] tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name out_dir, file_name = show_result_meshlab(data, result, temp_out_dir) expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) tmp_dir.cleanup() # test multi-modality show # indoor scene pcd = 'tests/data/sunrgbd/points/000001.bin' filename = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg' box_3d = DepthInstance3DBoxes( torch.tensor( [[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]])) img = np.random.randn(1, 3, 608, 832) k_mat = np.array([[529.5000, 0.0000, 365.0000], [0.0000, 529.5000, 265.0000], [0.0000, 0.0000, 1.0000]]) rt_mat = np.array([[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808], [0.0634, -0.1808, 0.9815]]) rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ rt_mat.transpose( 1, 0) depth2img = k_mat @ rt_mat img_meta = dict(filename=filename, depth2img=depth2img, pcd_horizontal_flip=False, pcd_vertical_flip=False, box_mode_3d=Box3DMode.DEPTH, box_type_3d=DepthInstance3DBoxes, pcd_trans=np.array([0., 0., 0.]), pcd_scale_factor=1.0, pts_filename=pcd, transformation_3d_flow=['R', 'S', 'T']) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img]) result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)] tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.3, task='multi_modality-det') expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_png = file_name + '_img.png' expected_outfile_proj = file_name + '_pred.png' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) expected_outfile_png_path = os.path.join(out_dir, file_name, expected_outfile_png) expected_outfile_proj_path = os.path.join(out_dir, file_name, expected_outfile_proj) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) assert os.path.exists(expected_outfile_png_path) assert os.path.exists(expected_outfile_proj_path) tmp_dir.cleanup() # outdoor scene pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin' filename = 'tests/data/kitti/training/image_2/000000.png' box_3d = LiDARInstance3DBoxes( torch.tensor( [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]])) img = np.random.randn(1, 3, 384, 1280) lidar2img = np.array( [[6.09695435e+02, -7.21421631e+02, -1.25125790e+00, -1.23041824e+02], [1.80384201e+02, 7.64479828e+00, -7.19651550e+02, -1.01016693e+02], [9.99945343e-01, 1.24365499e-04, 1.04513029e-02, -2.69386917e-01], [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) img_meta = dict(filename=filename, pcd_horizontal_flip=False, pcd_vertical_flip=False, box_mode_3d=Box3DMode.LIDAR, box_type_3d=LiDARInstance3DBoxes, pcd_trans=np.array([0., 0., 0.]), pcd_scale_factor=1.0, pts_filename=pcd, lidar2img=lidar2img) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img]) result = [ dict(pts_bbox=dict( boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) ] out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.1, task='multi_modality-det') tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_png = file_name + '_img.png' expected_outfile_proj = file_name + '_pred.png' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) expected_outfile_png_path = os.path.join(out_dir, file_name, expected_outfile_png) expected_outfile_proj_path = os.path.join(out_dir, file_name, expected_outfile_proj) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) assert os.path.exists(expected_outfile_png_path) assert os.path.exists(expected_outfile_proj_path) tmp_dir.cleanup() # test mono-3d show filename = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-' \ '07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg' box_3d = CameraInstance3DBoxes( torch.tensor( [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]])) img = np.random.randn(1, 3, 384, 1280) cam2img = np.array([[100.0, 0.0, 50.0], [0.0, 100.0, 50.0], [0.0, 0.0, 1.0]]) img_meta = dict(filename=filename, pcd_horizontal_flip=False, pcd_vertical_flip=False, box_mode_3d=Box3DMode.CAM, box_type_3d=CameraInstance3DBoxes, pcd_trans=np.array([0., 0., 0.]), pcd_scale_factor=1.0, cam2img=cam2img) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img]) result = [ dict(img_bbox=dict( boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) ] out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.1, task='mono-det') tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name expected_outfile_png = file_name + '_img.png' expected_outfile_proj = file_name + '_pred.png' expected_outfile_png_path = os.path.join(out_dir, file_name, expected_outfile_png) expected_outfile_proj_path = os.path.join(out_dir, file_name, expected_outfile_proj) assert os.path.exists(expected_outfile_png_path) assert os.path.exists(expected_outfile_proj_path) tmp_dir.cleanup() # test seg show pcd = 'tests/data/scannet/points/scene0000_00.bin' points = np.random.rand(100, 6) img_meta = dict(pts_filename=pcd) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]]) pred_seg = torch.randint(0, 20, (100, )) result = [dict(semantic_mask=pred_seg)] tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, task='seg') expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) tmp_dir.cleanup()
def loss(self, bbox_preds, points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask=None, pts_instance_mask=None, img_metas=None, rpn_targets=None, gt_bboxes_ignore=None): """Compute loss. Args: bbox_preds (dict): Predictions from forward of h3d bbox head. points (list[torch.Tensor]): Input points. gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic mask. pts_instance_mask (None | list[torch.Tensor]): Point-wise instance mask. img_metas (list[dict]): Contain pcd and img's meta info. rpn_targets (Tuple) : Targets generated by rpn head. gt_bboxes_ignore (None | list[torch.Tensor]): Specify which bounding. Returns: dict: Losses of H3dnet. """ (vote_targets, vote_target_masks, size_class_targets, size_res_targets, dir_class_targets, dir_res_targets, center_targets, mask_targets, valid_gt_masks, objectness_targets, objectness_weights, box_loss_weights, valid_gt_weights) = rpn_targets losses = {} # calculate refined proposal loss refined_proposal_loss = self.get_proposal_stage_loss( bbox_preds, size_class_targets, size_res_targets, dir_class_targets, dir_res_targets, center_targets, mask_targets, objectness_targets, objectness_weights, box_loss_weights, valid_gt_weights, suffix='_optimized') for key in refined_proposal_loss.keys(): losses[key + '_optimized'] = refined_proposal_loss[key] bbox3d_optimized = self.bbox_coder.decode(bbox_preds, suffix='_optimized') targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask, pts_instance_mask, bbox_preds) (cues_objectness_label, cues_sem_label, proposal_objectness_label, cues_mask, cues_match_mask, proposal_objectness_mask, cues_matching_label, obj_surface_line_center) = targets # match scores for each geometric primitive objectness_scores = bbox_preds['matching_score'] # match scores for the semantics of primitives objectness_scores_sem = bbox_preds['semantic_matching_score'] primitive_objectness_loss = self.cues_objectness_loss( objectness_scores.transpose(2, 1), cues_objectness_label, weight=cues_mask, avg_factor=cues_mask.sum() + 1e-6) primitive_sem_loss = self.cues_semantic_loss( objectness_scores_sem.transpose(2, 1), cues_sem_label, weight=cues_mask, avg_factor=cues_mask.sum() + 1e-6) objectness_scores = bbox_preds['obj_scores_optimized'] objectness_loss_refine = self.proposal_objectness_loss( objectness_scores.transpose(2, 1), proposal_objectness_label) primitive_matching_loss = (objectness_loss_refine * cues_match_mask).sum() / ( cues_match_mask.sum() + 1e-6) * 0.5 primitive_sem_matching_loss = ( objectness_loss_refine * proposal_objectness_mask).sum() / ( proposal_objectness_mask.sum() + 1e-6) * 0.5 # Get the object surface center here batch_size, object_proposal = bbox3d_optimized.shape[:2] refined_bbox = DepthInstance3DBoxes(bbox3d_optimized.reshape( -1, 7).clone(), box_dim=bbox3d_optimized.shape[-1], with_yaw=self.with_angle, origin=(0.5, 0.5, 0.5)) pred_obj_surface_center, pred_obj_line_center = \ refined_bbox.get_surface_line_center() pred_obj_surface_center = pred_obj_surface_center.reshape( batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3) pred_obj_line_center = pred_obj_line_center.reshape( batch_size, -1, 12, 3).transpose(1, 2).reshape(batch_size, -1, 3) pred_surface_line_center = torch.cat( (pred_obj_surface_center, pred_obj_line_center), 1) square_dist = self.primitive_center_loss(pred_surface_line_center, obj_surface_line_center) match_dist = torch.sqrt(square_dist.sum(dim=-1) + 1e-6) primitive_centroid_reg_loss = torch.sum( match_dist * cues_matching_label) / (cues_matching_label.sum() + 1e-6) refined_loss = dict( primitive_objectness_loss=primitive_objectness_loss, primitive_sem_loss=primitive_sem_loss, primitive_matching_loss=primitive_matching_loss, primitive_sem_matching_loss=primitive_sem_matching_loss, primitive_centroid_reg_loss=primitive_centroid_reg_loss) losses.update(refined_loss) return losses
def forward(self, feats_dict, sample_mod): """Forward pass. Args: feats_dict (dict): Feature dict from backbone. sample_mod (str): Sample mode for vote aggregation layer. valid modes are "vote", "seed" and "random". Returns: dict: Predictions of vote head. """ ret_dict = {} aggregated_points = feats_dict['aggregated_points'] original_feature = feats_dict['aggregated_features'] batch_size = original_feature.shape[0] object_proposal = original_feature.shape[2] # Extract surface center, features and semantic predictions z_center = feats_dict['pred_z_center'] xy_center = feats_dict['pred_xy_center'] z_semantic = feats_dict['sem_cls_scores_z'] xy_semantic = feats_dict['sem_cls_scores_xy'] z_feature = feats_dict['aggregated_features_z'] xy_feature = feats_dict['aggregated_features_xy'] # Extract line points and features line_center = feats_dict['pred_line_center'] line_feature = feats_dict['aggregated_features_line'] surface_center_pred = torch.cat((z_center, xy_center), dim=1) ret_dict['surface_center_pred'] = surface_center_pred ret_dict['surface_sem_pred'] = torch.cat((z_semantic, xy_semantic), dim=1) # Extract the surface and line centers of rpn proposals rpn_proposals = feats_dict['proposal_list'] rpn_proposals_bbox = DepthInstance3DBoxes( rpn_proposals.reshape(-1, 7).clone(), box_dim=rpn_proposals.shape[-1], with_yaw=self.with_angle, origin=(0.5, 0.5, 0.5)) obj_surface_center, obj_line_center = \ rpn_proposals_bbox.get_surface_line_center() obj_surface_center = obj_surface_center.reshape( batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3) obj_line_center = obj_line_center.reshape(batch_size, -1, 12, 3).transpose(1, 2).reshape( batch_size, -1, 3) ret_dict['surface_center_object'] = obj_surface_center ret_dict['line_center_object'] = obj_line_center # aggregate primitive z and xy features to rpn proposals surface_center_feature_pred = torch.cat((z_feature, xy_feature), dim=2) surface_center_feature_pred = torch.cat( (surface_center_feature_pred.new_zeros( (batch_size, 6, surface_center_feature_pred.shape[2])), surface_center_feature_pred), dim=1) surface_xyz, surface_features, _ = self.surface_center_matcher( surface_center_pred, surface_center_feature_pred, target_xyz=obj_surface_center) # aggregate primitive line features to rpn proposals line_feature = torch.cat((line_feature.new_zeros( (batch_size, 12, line_feature.shape[2])), line_feature), dim=1) line_xyz, line_features, _ = self.line_center_matcher( line_center, line_feature, target_xyz=obj_line_center) # combine the surface and line features combine_features = torch.cat((surface_features, line_features), dim=2) matching_features = self.matching_conv(combine_features) matching_score = self.matching_pred(matching_features) ret_dict['matching_score'] = matching_score.transpose(2, 1) semantic_matching_features = self.semantic_matching_conv( combine_features) semantic_matching_score = self.semantic_matching_pred( semantic_matching_features) ret_dict['semantic_matching_score'] = \ semantic_matching_score.transpose(2, 1) surface_features = self.surface_feats_aggregation(surface_features) line_features = self.line_feats_aggregation(line_features) # Combine all surface and line features surface_features = surface_features.view(batch_size, -1, object_proposal) line_features = line_features.view(batch_size, -1, object_proposal) combine_feature = torch.cat((surface_features, line_features), dim=1) # Final bbox predictions bbox_predictions = self.bbox_pred[0](combine_feature) bbox_predictions += original_feature for conv_module in self.bbox_pred[1:]: bbox_predictions = conv_module(bbox_predictions) refine_decode_res = self.bbox_coder.split_pred( bbox_predictions[:, :self.num_classes + 2], bbox_predictions[:, self.num_classes + 2:], aggregated_points) for key in refine_decode_res.keys(): ret_dict[key + '_optimized'] = refine_decode_res[key] return ret_dict
def test_boxes_conversion(): # test CAM to LIDAR and DEPTH cam_boxes = CameraInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.LIDAR) expected_tensor = torch.tensor( [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800], [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200], [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700], [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900], [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.DEPTH) expected_tensor = torch.tensor( [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800], [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200], [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700], [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900], [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test LIDAR to CAM and DEPTH lidar_boxes = LiDARInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.CAM) expected_tensor = torch.tensor( [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800], [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200], [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700], [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900], [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.DEPTH) expected_tensor = torch.tensor( [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test DEPTH to CAM and LIDAR depth_boxes = DepthInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.CAM) expected_tensor = torch.tensor( [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800], [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200], [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700], [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900], [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.LIDAR) expected_tensor = torch.tensor( [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
def test_h3d_head(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) h3d_head_cfg = _get_roi_head_cfg('h3dnet/h3dnet_3x8_scannet-3d-18class.py') num_point = 128 num_proposal = 64 h3d_head_cfg.primitive_list[0].vote_aggregation_cfg.num_point = num_point h3d_head_cfg.primitive_list[1].vote_aggregation_cfg.num_point = num_point h3d_head_cfg.primitive_list[2].vote_aggregation_cfg.num_point = num_point h3d_head_cfg.bbox_head.num_proposal = num_proposal self = build_head(h3d_head_cfg).cuda() # prepare roi outputs fp_xyz = [torch.rand([1, num_point, 3], dtype=torch.float32).cuda()] hd_features = torch.rand([1, 256, num_point], dtype=torch.float32).cuda() fp_indices = [torch.randint(0, 128, [1, num_point]).cuda()] aggregated_points = torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() aggregated_features = torch.rand([1, 128, num_proposal], dtype=torch.float32).cuda() proposal_list = torch.cat([ torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4 - 2, torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4, torch.zeros([1, num_proposal, 1]).cuda() ], dim=-1) input_dict = dict(fp_xyz_net0=fp_xyz, hd_feature=hd_features, aggregated_points=aggregated_points, aggregated_features=aggregated_features, seed_points=fp_xyz[0], seed_indices=fp_indices[0], proposal_list=proposal_list) # prepare gt label from mmdet3d.core.bbox import DepthInstance3DBoxes gt_bboxes_3d = [ DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()), DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()) ] gt_labels_3d = torch.randint(0, 18, [1, 4]).cuda() gt_labels_3d = [gt_labels_3d[0]] pts_semantic_mask = torch.randint(0, 19, [1, num_point]).cuda() pts_semantic_mask = [pts_semantic_mask[0]] pts_instance_mask = torch.randint(0, 4, [1, num_point]).cuda() pts_instance_mask = [pts_instance_mask[0]] points = torch.rand([1, num_point, 3], dtype=torch.float32).cuda() # prepare rpn targets vote_targets = torch.rand([1, num_point, 9], dtype=torch.float32).cuda() vote_target_masks = torch.rand([1, num_point], dtype=torch.float32).cuda() size_class_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda().long() size_res_targets = torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() dir_class_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda().long() dir_res_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda() center_targets = torch.rand([1, 4, 3], dtype=torch.float32).cuda() mask_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda().long() valid_gt_masks = torch.rand([1, 4], dtype=torch.float32).cuda() objectness_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda().long() objectness_weights = torch.rand([1, num_proposal], dtype=torch.float32).cuda() box_loss_weights = torch.rand([1, num_proposal], dtype=torch.float32).cuda() valid_gt_weights = torch.rand([1, 4], dtype=torch.float32).cuda() targets = (vote_targets, vote_target_masks, size_class_targets, size_res_targets, dir_class_targets, dir_res_targets, center_targets, mask_targets, valid_gt_masks, objectness_targets, objectness_weights, box_loss_weights, valid_gt_weights) input_dict['targets'] = targets # train forward ret_dict = self.forward_train(input_dict, points=points, gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d, pts_semantic_mask=pts_semantic_mask, pts_instance_mask=pts_instance_mask, img_metas=None) assert ret_dict['flag_loss_z'] >= 0 assert ret_dict['vote_loss_z'] >= 0 assert ret_dict['center_loss_z'] >= 0 assert ret_dict['size_loss_z'] >= 0 assert ret_dict['sem_loss_z'] >= 0 assert ret_dict['objectness_loss_optimized'] >= 0 assert ret_dict['primitive_sem_matching_loss'] >= 0
def test_primitive_head(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) primitive_head_cfg = dict( type='PrimitiveHead', num_dims=2, num_classes=18, primitive_mode='z', vote_module_cfg=dict(in_channels=256, vote_per_seed=1, gt_per_seed=1, conv_channels=(256, 256), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), norm_feats=True, vote_loss=dict(type='ChamferDistance', mode='l1', reduction='none', loss_dst_weight=10.0)), vote_aggregation_cfg=dict(type='PointSAModule', num_point=64, radius=0.3, num_sample=16, mlp_channels=[256, 128, 128, 128], use_xyz=True, normalize_xyz=True), feat_channels=(128, 128), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=dict(type='CrossEntropyLoss', class_weight=[0.4, 0.6], reduction='mean', loss_weight=1.0), center_loss=dict(type='ChamferDistance', mode='l1', reduction='sum', loss_src_weight=1.0, loss_dst_weight=1.0), semantic_reg_loss=dict(type='ChamferDistance', mode='l1', reduction='sum', loss_src_weight=1.0, loss_dst_weight=1.0), semantic_cls_loss=dict(type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), train_cfg=dict(dist_thresh=0.2, var_thresh=1e-2, lower_thresh=1e-6, num_point=100, num_point_line=10, line_thresh=0.2)) self = build_head(primitive_head_cfg).cuda() fp_xyz = [torch.rand([2, 64, 3], dtype=torch.float32).cuda()] hd_features = torch.rand([2, 256, 64], dtype=torch.float32).cuda() fp_indices = [torch.randint(0, 64, [2, 64]).cuda()] input_dict = dict(fp_xyz_net0=fp_xyz, hd_feature=hd_features, fp_indices_net0=fp_indices) # test forward ret_dict = self(input_dict, 'vote') assert ret_dict['center_z'].shape == torch.Size([2, 64, 3]) assert ret_dict['size_residuals_z'].shape == torch.Size([2, 64, 2]) assert ret_dict['sem_cls_scores_z'].shape == torch.Size([2, 64, 18]) assert ret_dict['aggregated_points_z'].shape == torch.Size([2, 64, 3]) # test loss points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda() ret_dict['seed_points'] = fp_xyz[0] ret_dict['seed_indices'] = fp_indices[0] from mmdet3d.core.bbox import DepthInstance3DBoxes gt_bboxes_3d = [ DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()), DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()) ] gt_labels_3d = torch.randint(0, 18, [2, 4]).cuda() gt_labels_3d = [gt_labels_3d[0], gt_labels_3d[1]] pts_semantic_mask = torch.randint(0, 19, [2, 1024]).cuda() pts_semantic_mask = [pts_semantic_mask[0], pts_semantic_mask[1]] pts_instance_mask = torch.randint(0, 4, [2, 1024]).cuda() pts_instance_mask = [pts_instance_mask[0], pts_instance_mask[1]] loss_input_dict = dict(bbox_preds=ret_dict, points=points, gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d, pts_semantic_mask=pts_semantic_mask, pts_instance_mask=pts_instance_mask) losses_dict = self.loss(**loss_input_dict) assert losses_dict['flag_loss_z'] >= 0 assert losses_dict['vote_loss_z'] >= 0 assert losses_dict['center_loss_z'] >= 0 assert losses_dict['size_loss_z'] >= 0 assert losses_dict['sem_loss_z'] >= 0 # 'Primitive_mode' should be one of ['z', 'xy', 'line'] with pytest.raises(AssertionError): primitive_head_cfg['vote_module_cfg']['in_channels'] = 'xyz' build_head(primitive_head_cfg)
def test_scannet_pipeline(): class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') np.random.seed(0) pipelines = [ dict(type='LoadPointsFromFile', coord_type='DEPTH', shift_height=True, load_dim=6, use_dim=[0, 1, 2]), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_mask_3d=True, with_seg_3d=True), dict(type='GlobalAlignment', rotation_axis=2), dict(type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)), dict(type='IndoorPointSample', num_points=5), dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=1.0, flip_ratio_bev_vertical=1.0), dict(type='GlobalRotScaleTrans', rot_range=[-0.087266, 0.087266], scale_ratio_range=[1.0, 1.0], shift_height=True), dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='Collect3D', keys=[ 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'pts_instance_mask' ]), ] pipeline = Compose(pipelines) info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0] results = dict() data_path = './tests/data/scannet' results['pts_filename'] = osp.join(data_path, info['pts_path']) if info['annos']['gt_num'] != 0: scannet_gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( np.float32) scannet_gt_labels_3d = info['annos']['class'].astype(np.long) else: scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32) scannet_gt_labels_3d = np.zeros((1, ), dtype=np.long) results['ann_info'] = dict() results['ann_info']['pts_instance_mask_path'] = osp.join( data_path, info['pts_instance_mask_path']) results['ann_info']['pts_semantic_mask_path'] = osp.join( data_path, info['pts_semantic_mask_path']) results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes( scannet_gt_bboxes_3d, box_dim=6, with_yaw=False) results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d results['ann_info']['axis_align_matrix'] = \ info['annos']['axis_align_matrix'] results['img_fields'] = [] results['bbox3d_fields'] = [] results['pts_mask_fields'] = [] results['pts_seg_fields'] = [] results = pipeline(results) points = results['points']._data gt_bboxes_3d = results['gt_bboxes_3d']._data gt_labels_3d = results['gt_labels_3d']._data pts_semantic_mask = results['pts_semantic_mask']._data pts_instance_mask = results['pts_instance_mask']._data expected_points = torch.tensor( [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00], [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00], [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03], [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000], [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000], [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000], [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000], [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]]) expected_gt_labels_3d = np.array([ 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, 0, 0, 0, 5, 5, 5 ]) expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) assert torch.allclose(points, expected_points, 1e-2) assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d, 1e-2) assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d) assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask) assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
def test_groupfree3d_head(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) vote_head_cfg = _get_vote_head_cfg( 'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py') self = build_head(vote_head_cfg).cuda() fp_xyz = [torch.rand([2, 256, 3], dtype=torch.float32).cuda()] fp_features = [torch.rand([2, 288, 256], dtype=torch.float32).cuda()] fp_indices = [torch.randint(0, 128, [2, 256]).cuda()] input_dict = dict(fp_xyz=fp_xyz, fp_features=fp_features, fp_indices=fp_indices) # test forward ret_dict = self(input_dict, 'kps') assert ret_dict['seeds_obj_cls_logits'].shape == torch.Size([2, 1, 256]) assert ret_dict['s5.center'].shape == torch.Size([2, 256, 3]) assert ret_dict['s5.dir_class'].shape == torch.Size([2, 256, 1]) assert ret_dict['s5.dir_res'].shape == torch.Size([2, 256, 1]) assert ret_dict['s5.size_class'].shape == torch.Size([2, 256, 18]) assert ret_dict['s5.size_res'].shape == torch.Size([2, 256, 18, 3]) assert ret_dict['s5.obj_scores'].shape == torch.Size([2, 256, 1]) assert ret_dict['s5.sem_scores'].shape == torch.Size([2, 256, 18]) # test losses points = [torch.rand([50000, 4], device='cuda') for i in range(2)] gt_bbox1 = torch.rand([10, 7], dtype=torch.float32).cuda() gt_bbox2 = torch.rand([10, 7], dtype=torch.float32).cuda() gt_bbox1 = DepthInstance3DBoxes(gt_bbox1) gt_bbox2 = DepthInstance3DBoxes(gt_bbox2) gt_bboxes = [gt_bbox1, gt_bbox2] pts_instance_mask_1 = torch.randint(0, 10, [50000], device='cuda') pts_instance_mask_2 = torch.randint(0, 10, [50000], device='cuda') pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2] pts_semantic_mask_1 = torch.randint(0, 19, [50000], device='cuda') pts_semantic_mask_2 = torch.randint(0, 19, [50000], device='cuda') pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2] labels_1 = torch.randint(0, 18, [10], device='cuda') labels_2 = torch.randint(0, 18, [10], device='cuda') gt_labels = [labels_1, labels_2] losses = self.loss(ret_dict, points, gt_bboxes, gt_labels, pts_semantic_mask, pts_instance_mask) assert losses['s5.objectness_loss'] >= 0 assert losses['s5.semantic_loss'] >= 0 assert losses['s5.center_loss'] >= 0 assert losses['s5.dir_class_loss'] >= 0 assert losses['s5.dir_res_loss'] >= 0 assert losses['s5.size_class_loss'] >= 0 assert losses['s5.size_res_loss'] >= 0 # test multiclass_nms_single obj_scores = torch.rand([256], device='cuda') sem_scores = torch.rand([256, 18], device='cuda') points = torch.rand([50000, 3], device='cuda') bbox = torch.rand([256, 7], device='cuda') input_meta = dict(box_type_3d=DepthInstance3DBoxes) bbox_selected, score_selected, labels = \ self.multiclass_nms_single(obj_scores, sem_scores, bbox, points, input_meta) assert bbox_selected.shape[0] >= 0 assert bbox_selected.shape[1] == 7 assert score_selected.shape[0] >= 0 assert labels.shape[0] >= 0 # test get_boxes points = torch.rand([1, 50000, 3], device='cuda') seed_points = torch.rand([1, 1024, 3], device='cuda') seed_indices = torch.randint(0, 50000, [1, 1024], device='cuda') obj_scores = torch.rand([1, 256, 1], device='cuda') center = torch.rand([1, 256, 3], device='cuda') dir_class = torch.rand([1, 256, 1], device='cuda') dir_res_norm = torch.rand([1, 256, 1], device='cuda') dir_res = torch.rand([1, 256, 1], device='cuda') size_class = torch.rand([1, 256, 18], device='cuda') size_res = torch.rand([1, 256, 18, 3], device='cuda') sem_scores = torch.rand([1, 256, 18], device='cuda') bbox_preds = dict() bbox_preds['seed_points'] = seed_points bbox_preds['seed_indices'] = seed_indices bbox_preds['s5.obj_scores'] = obj_scores bbox_preds['s5.center'] = center bbox_preds['s5.dir_class'] = dir_class bbox_preds['s5.dir_res_norm'] = dir_res_norm bbox_preds['s5.dir_res'] = dir_res bbox_preds['s5.size_class'] = size_class bbox_preds['s5.size_res'] = size_res bbox_preds['s5.sem_scores'] = sem_scores self.test_cfg['prediction_stages'] = 'last' results = self.get_bboxes(points, bbox_preds, [input_meta]) assert results[0][0].tensor.shape[0] >= 0 assert results[0][0].tensor.shape[1] == 7 assert results[0][1].shape[0] >= 0 assert results[0][2].shape[0] >= 0