def test_fcos3d(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) fcos3d_cfg = _get_detector_cfg( 'fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py') self = build_detector(fcos3d_cfg).cuda() imgs = torch.rand([1, 3, 928, 1600], dtype=torch.float32).cuda() gt_bboxes = [torch.rand([3, 4], dtype=torch.float32).cuda()] gt_bboxes_3d = CameraInstance3DBoxes( torch.rand([3, 9], device='cuda'), box_dim=9) gt_labels = [torch.randint(0, 10, [3], device='cuda')] gt_labels_3d = gt_labels centers2d = [torch.rand([3, 2], dtype=torch.float32).cuda()] depths = [torch.rand([3], dtype=torch.float32).cuda()] attr_labels = [torch.randint(0, 9, [3], device='cuda')] img_metas = [ dict( cam2img=[[1260.8474446004698, 0.0, 807.968244525554], [0.0, 1260.8474446004698, 495.3344268742088], [0.0, 0.0, 1.0]], scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32), box_type_3d=CameraInstance3DBoxes) ] # test forward_train losses = self.forward_train(imgs, img_metas, gt_bboxes, gt_labels, gt_bboxes_3d, gt_labels_3d, centers2d, depths, attr_labels) assert losses['loss_cls'] >= 0 assert losses['loss_offset'] >= 0 assert losses['loss_depth'] >= 0 assert losses['loss_size'] >= 0 assert losses['loss_rotsin'] >= 0 assert losses['loss_centerness'] >= 0 assert losses['loss_velo'] >= 0 assert losses['loss_dir'] >= 0 assert losses['loss_attr'] >= 0 # test simple_test with torch.no_grad(): results = self.simple_test(imgs, img_metas) boxes_3d = results[0]['img_bbox']['boxes_3d'] scores_3d = results[0]['img_bbox']['scores_3d'] labels_3d = results[0]['img_bbox']['labels_3d'] attrs_3d = results[0]['img_bbox']['attrs_3d'] assert boxes_3d.tensor.shape[0] >= 0 assert boxes_3d.tensor.shape[1] == 9 assert scores_3d.shape[0] >= 0 assert labels_3d.shape[0] >= 0 assert attrs_3d.shape[0] >= 0
def test_show_result_meshlab(): pcd = 'tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+' \ '0800__LIDAR_TOP__1533201470948018.pcd.bin' box_3d = LiDARInstance3DBoxes( torch.tensor( [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) labels_3d = torch.tensor([0]) scores_3d = torch.tensor([0.5]) points = np.random.rand(100, 4) img_meta = dict(pts_filename=pcd, boxes_3d=box_3d, box_mode_3d=Box3DMode.LIDAR) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]]) result = [ dict(pts_bbox=dict( boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) ] tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name out_dir, file_name = show_result_meshlab(data, result, temp_out_dir) expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) tmp_dir.cleanup() # test multi-modality show # indoor scene pcd = 'tests/data/sunrgbd/points/000001.bin' filename = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg' box_3d = DepthInstance3DBoxes( torch.tensor( [[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]])) img = np.random.randn(1, 3, 608, 832) k_mat = np.array([[529.5000, 0.0000, 365.0000], [0.0000, 529.5000, 265.0000], [0.0000, 0.0000, 1.0000]]) rt_mat = np.array([[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808], [0.0634, -0.1808, 0.9815]]) rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ rt_mat.transpose( 1, 0) depth2img = k_mat @ rt_mat img_meta = dict(filename=filename, depth2img=depth2img, pcd_horizontal_flip=False, pcd_vertical_flip=False, box_mode_3d=Box3DMode.DEPTH, box_type_3d=DepthInstance3DBoxes, pcd_trans=np.array([0., 0., 0.]), pcd_scale_factor=1.0, pts_filename=pcd, transformation_3d_flow=['R', 'S', 'T']) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img]) result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)] tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.3, task='multi_modality-det') expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_png = file_name + '_img.png' expected_outfile_proj = file_name + '_pred.png' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) expected_outfile_png_path = os.path.join(out_dir, file_name, expected_outfile_png) expected_outfile_proj_path = os.path.join(out_dir, file_name, expected_outfile_proj) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) assert os.path.exists(expected_outfile_png_path) assert os.path.exists(expected_outfile_proj_path) tmp_dir.cleanup() # outdoor scene pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin' filename = 'tests/data/kitti/training/image_2/000000.png' box_3d = LiDARInstance3DBoxes( torch.tensor( [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]])) img = np.random.randn(1, 3, 384, 1280) lidar2img = np.array( [[6.09695435e+02, -7.21421631e+02, -1.25125790e+00, -1.23041824e+02], [1.80384201e+02, 7.64479828e+00, -7.19651550e+02, -1.01016693e+02], [9.99945343e-01, 1.24365499e-04, 1.04513029e-02, -2.69386917e-01], [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) img_meta = dict(filename=filename, pcd_horizontal_flip=False, pcd_vertical_flip=False, box_mode_3d=Box3DMode.LIDAR, box_type_3d=LiDARInstance3DBoxes, pcd_trans=np.array([0., 0., 0.]), pcd_scale_factor=1.0, pts_filename=pcd, lidar2img=lidar2img) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img]) result = [ dict(pts_bbox=dict( boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) ] out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.1, task='multi_modality-det') tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_png = file_name + '_img.png' expected_outfile_proj = file_name + '_pred.png' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) expected_outfile_png_path = os.path.join(out_dir, file_name, expected_outfile_png) expected_outfile_proj_path = os.path.join(out_dir, file_name, expected_outfile_proj) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) assert os.path.exists(expected_outfile_png_path) assert os.path.exists(expected_outfile_proj_path) tmp_dir.cleanup() # test mono-3d show filename = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-' \ '07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg' box_3d = CameraInstance3DBoxes( torch.tensor( [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]])) img = np.random.randn(1, 3, 384, 1280) cam2img = np.array([[100.0, 0.0, 50.0], [0.0, 100.0, 50.0], [0.0, 0.0, 1.0]]) img_meta = dict(filename=filename, pcd_horizontal_flip=False, pcd_vertical_flip=False, box_mode_3d=Box3DMode.CAM, box_type_3d=CameraInstance3DBoxes, pcd_trans=np.array([0., 0., 0.]), pcd_scale_factor=1.0, cam2img=cam2img) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img]) result = [ dict(img_bbox=dict( boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) ] out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.1, task='mono-det') tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name expected_outfile_png = file_name + '_img.png' expected_outfile_proj = file_name + '_pred.png' expected_outfile_png_path = os.path.join(out_dir, file_name, expected_outfile_png) expected_outfile_proj_path = os.path.join(out_dir, file_name, expected_outfile_proj) assert os.path.exists(expected_outfile_png_path) assert os.path.exists(expected_outfile_proj_path) tmp_dir.cleanup() # test seg show pcd = 'tests/data/scannet/points/scene0000_00.bin' points = np.random.rand(100, 6) img_meta = dict(pts_filename=pcd) data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]]) pred_seg = torch.randint(0, 20, (100, )) result = [dict(semantic_mask=pred_seg)] tmp_dir = tempfile.TemporaryDirectory() temp_out_dir = tmp_dir.name out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, task='seg') expected_outfile_pred = file_name + '_pred.obj' expected_outfile_pts = file_name + '_points.obj' expected_outfile_pred_path = os.path.join(out_dir, file_name, expected_outfile_pred) expected_outfile_pts_path = os.path.join(out_dir, file_name, expected_outfile_pts) assert os.path.exists(expected_outfile_pred_path) assert os.path.exists(expected_outfile_pts_path) tmp_dir.cleanup()
def test_boxes_conversion(): # test CAM to LIDAR and DEPTH cam_boxes = CameraInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.LIDAR) expected_tensor = torch.tensor( [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800], [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200], [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700], [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900], [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.DEPTH) expected_tensor = torch.tensor( [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800], [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200], [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700], [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900], [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test LIDAR to CAM and DEPTH lidar_boxes = LiDARInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.CAM) expected_tensor = torch.tensor( [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800], [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200], [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700], [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900], [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.DEPTH) expected_tensor = torch.tensor( [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test DEPTH to CAM and LIDAR depth_boxes = DepthInstance3DBoxes( [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.CAM) expected_tensor = torch.tensor( [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800], [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200], [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700], [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900], [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.LIDAR) expected_tensor = torch.tensor( [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
def test_fcos_mono3d_head(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') _setup_seed(0) fcos3d_head_cfg = _get_head_cfg( 'fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py') self = build_head(fcos3d_head_cfg).cuda() feats = [ torch.rand([2, 256, 116, 200], dtype=torch.float32).cuda(), torch.rand([2, 256, 58, 100], dtype=torch.float32).cuda(), torch.rand([2, 256, 29, 50], dtype=torch.float32).cuda(), torch.rand([2, 256, 15, 25], dtype=torch.float32).cuda(), torch.rand([2, 256, 8, 13], dtype=torch.float32).cuda() ] # test forward ret_dict = self(feats) assert len(ret_dict) == 5 assert len(ret_dict[0]) == 5 assert ret_dict[0][0].shape == torch.Size([2, 10, 116, 200]) # test loss gt_bboxes = [ torch.rand([3, 4], dtype=torch.float32).cuda(), torch.rand([3, 4], dtype=torch.float32).cuda() ] gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 9], device='cuda'), box_dim=9) gt_labels = [torch.randint(0, 10, [3], device='cuda') for i in range(2)] gt_labels_3d = gt_labels centers2d = [ torch.rand([3, 2], dtype=torch.float32).cuda(), torch.rand([3, 2], dtype=torch.float32).cuda() ] depths = [ torch.rand([3], dtype=torch.float32).cuda(), torch.rand([3], dtype=torch.float32).cuda() ] attr_labels = [torch.randint(0, 9, [3], device='cuda') for i in range(2)] img_metas = [ dict(cam2img=[[1260.8474446004698, 0.0, 807.968244525554], [0.0, 1260.8474446004698, 495.3344268742088], [0.0, 0.0, 1.0]], scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32), box_type_3d=CameraInstance3DBoxes) for i in range(2) ] losses = self.loss(*ret_dict, gt_bboxes, gt_labels, gt_bboxes_3d, gt_labels_3d, centers2d, depths, attr_labels, img_metas) assert losses['loss_cls'] >= 0 assert losses['loss_offset'] >= 0 assert losses['loss_depth'] >= 0 assert losses['loss_size'] >= 0 assert losses['loss_rotsin'] >= 0 assert losses['loss_centerness'] >= 0 assert losses['loss_velo'] >= 0 assert losses['loss_dir'] >= 0 assert losses['loss_attr'] >= 0 # test get_boxes results = self.get_bboxes(*ret_dict, img_metas) assert len(results) == 2 assert len(results[0]) == 4 assert results[0][0].tensor.shape == torch.Size([200, 9]) assert results[0][1].shape == torch.Size([200]) assert results[0][2].shape == torch.Size([200]) assert results[0][3].shape == torch.Size([200])