Example #1
0
def test_groupfree3dnet():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')

    _setup_seed(0)
    groupfree3d_cfg = _get_detector_cfg(
        'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
    self = build_detector(groupfree3d_cfg).cuda()

    points_0 = torch.rand([50000, 3], device='cuda')
    points_1 = torch.rand([50000, 3], device='cuda')
    points = [points_0, points_1]
    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)
    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)
    img_metas = [img_meta_0, img_meta_1]
    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
    gt_bboxes = [gt_bbox_0, gt_bbox_1]
    gt_labels_0 = torch.randint(0, 18, [10], device='cuda')
    gt_labels_1 = torch.randint(0, 18, [10], device='cuda')
    gt_labels = [gt_labels_0, gt_labels_1]
    pts_instance_mask_1 = torch.randint(0, 10, [50000], device='cuda')
    pts_instance_mask_2 = torch.randint(0, 10, [50000], device='cuda')
    pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2]
    pts_semantic_mask_1 = torch.randint(0, 19, [50000], device='cuda')
    pts_semantic_mask_2 = torch.randint(0, 19, [50000], device='cuda')
    pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2]

    # test forward_train
    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels,
                                pts_semantic_mask, pts_instance_mask)

    assert losses['sampling_objectness_loss'] >= 0
    assert losses['s5.objectness_loss'] >= 0
    assert losses['s5.semantic_loss'] >= 0
    assert losses['s5.center_loss'] >= 0
    assert losses['s5.dir_class_loss'] >= 0
    assert losses['s5.dir_res_loss'] >= 0
    assert losses['s5.size_class_loss'] >= 0
    assert losses['s5.size_res_loss'] >= 0

    # test simple_test
    with torch.no_grad():
        results = self.simple_test(points, img_metas)
    boxes_3d = results[0]['boxes_3d']
    scores_3d = results[0]['scores_3d']
    labels_3d = results[0]['labels_3d']
    assert boxes_3d.tensor.shape[0] >= 0
    assert boxes_3d.tensor.shape[1] == 7
    assert scores_3d.shape[0] >= 0
    assert labels_3d.shape[0] >= 0
Example #2
0
    def get_ann_info(self, index):
        """Get annotation info according to the given index.

        Args:
            index (int): Index of the annotation data to get.

        Returns:
            dict: annotation information consists of the following keys:

                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
                    3D ground truth bboxes
                - gt_labels_3d (np.ndarray): Labels of ground truths.
                - pts_instance_mask_path (str): Path of instance masks.
                - pts_semantic_mask_path (str): Path of semantic masks.
        """
        # Use index to get the annos, thus the evalhook could also use this api
        info = self.data_infos[index]
        if info['annos']['gt_num'] != 0:
            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
                np.float32)  # k, 6
            gt_labels_3d = info['annos']['class'].astype(np.long)
        else:
            gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32)
            gt_labels_3d = np.zeros((0, ), dtype=np.long)

        # to target box structure
        gt_bboxes_3d = DepthInstance3DBoxes(gt_bboxes_3d,
                                            origin=(0.5, 0.5, 0.5)).convert_to(
                                                self.box_mode_3d)

        anns_results = dict(gt_bboxes_3d=gt_bboxes_3d,
                            gt_labels_3d=gt_labels_3d)
        return anns_results
    def show(self, results, out_dir, show=True, pipeline=None):
        """Results visualization.

        Args:
            results (list[dict]): List of bounding boxes results.
            out_dir (str): Output directory of visualization result.
            show (bool): Visualize the results online.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
        """
        assert out_dir is not None, 'Expect out_dir, got none.'
        pipeline = self._get_pipeline(pipeline)
        for i, result in enumerate(results):
            data_info = self.data_infos[i]
            pts_path = data_info['pts_path']
            file_name = osp.split(pts_path)[-1].split('.')[0]
            points, img_metas, img, calib = self._extract_data(
                i, pipeline, ['points', 'img_metas', 'img', 'calib'])
            # scale colors to [0, 255]
            points = points.numpy()
            points[:, 3:] *= 255

            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
            pred_bboxes = result['boxes_3d'].tensor.numpy()
            show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
                        file_name, show)

            # multi-modality visualization
            if self.modality['use_camera'] and 'calib' in data_info.keys():
                img = img.numpy()
                # need to transpose channel to first dim
                img = img.transpose(1, 2, 0)
                pred_bboxes = DepthInstance3DBoxes(
                    pred_bboxes, origin=(0.5, 0.5, 0))
                gt_bboxes = DepthInstance3DBoxes(
                    gt_bboxes, origin=(0.5, 0.5, 0))
                show_multi_modality_result(
                    img,
                    gt_bboxes,
                    pred_bboxes,
                    calib,
                    out_dir,
                    file_name,
                    depth_bbox=True,
                    img_metas=img_metas,
                    show=show)
Example #4
0
    def show(self, results, out_dir, show=True):
        """Results visualization.

        Args:
            results (list[dict]): List of bounding boxes results.
            out_dir (str): Output directory of visualization result.
            show (bool): Visualize the results online.
        """
        assert out_dir is not None, 'Expect out_dir, got none.'
        for i, result in enumerate(results):
            data_info = self.data_infos[i]
            pts_path = data_info['pts_path']
            file_name = osp.split(pts_path)[-1].split('.')[0]
            if hasattr(self, 'pipeline'):
                example = self.prepare_test_data(i)
            else:
                example = None
            points = np.fromfile(osp.join(self.data_root, pts_path),
                                 dtype=np.float32).reshape(-1, 6)
            points[:, 3:] *= 255

            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
            pred_bboxes = result['boxes_3d'].tensor.numpy()
            show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
                        file_name, show)

            # multi-modality visualization
            if self.modality['use_camera'] and example is not None and \
                    'calib' in data_info.keys():
                img = mmcv.imread(example['img_metas']._data['filename'])
                pred_bboxes = DepthInstance3DBoxes(pred_bboxes,
                                                   origin=(0.5, 0.5, 0))
                gt_bboxes = DepthInstance3DBoxes(gt_bboxes,
                                                 origin=(0.5, 0.5, 0))
                show_multi_modality_result(
                    img,
                    gt_bboxes,
                    pred_bboxes,
                    example['calib'],
                    out_dir,
                    file_name,
                    depth_bbox=True,
                    img_metas=example['img_metas']._data,
                    show=show)
Example #5
0
def test_vote_net():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')

    _setup_seed(0)
    vote_net_cfg = _get_detector_cfg(
        'votenet/votenet_16x8_sunrgbd-3d-10class.py')
    self = build_detector(vote_net_cfg).cuda()
    points_0 = torch.rand([2000, 4], device='cuda')
    points_1 = torch.rand([2000, 4], device='cuda')
    points = [points_0, points_1]
    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)
    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)
    img_metas = [img_meta_0, img_meta_1]
    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
    gt_bboxes = [gt_bbox_0, gt_bbox_1]
    gt_labels_0 = torch.randint(0, 10, [10], device='cuda')
    gt_labels_1 = torch.randint(0, 10, [10], device='cuda')
    gt_labels = [gt_labels_0, gt_labels_1]

    # test forward_train
    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
    assert losses['vote_loss'] >= 0
    assert losses['objectness_loss'] >= 0
    assert losses['semantic_loss'] >= 0
    assert losses['center_loss'] >= 0
    assert losses['dir_class_loss'] >= 0
    assert losses['dir_res_loss'] >= 0
    assert losses['size_class_loss'] >= 0
    assert losses['size_res_loss'] >= 0

    # test simple_test
    with torch.no_grad():
        results = self.simple_test(points, img_metas)
    boxes_3d = results[0]['boxes_3d']
    scores_3d = results[0]['scores_3d']
    labels_3d = results[0]['labels_3d']
    assert boxes_3d.tensor.shape[0] >= 0
    assert boxes_3d.tensor.shape[1] == 7
    assert scores_3d.shape[0] >= 0
    assert labels_3d.shape[0] >= 0
Example #6
0
def test_merge_aug_bboxes_3d():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    img_meta_0 = dict(
        pcd_horizontal_flip=False,
        pcd_vertical_flip=True,
        pcd_scale_factor=1.0)
    img_meta_1 = dict(
        pcd_horizontal_flip=True,
        pcd_vertical_flip=False,
        pcd_scale_factor=1.0)
    img_meta_2 = dict(
        pcd_horizontal_flip=False,
        pcd_vertical_flip=False,
        pcd_scale_factor=0.5)
    img_metas = [[img_meta_0], [img_meta_1], [img_meta_2]]
    boxes_3d = DepthInstance3DBoxes(
        torch.tensor(
            [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956],
             [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500],
             [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]],
            device='cuda'))
    labels_3d = torch.tensor([0, 7, 6])
    scores_3d = torch.tensor([0.5, 1.0, 1.0])
    aug_result = dict(
        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
    aug_results = [aug_result, aug_result, aug_result]
    test_cfg = mmcv.ConfigDict(
        use_rotate_nms=True,
        nms_across_levels=False,
        nms_thr=0.01,
        score_thr=0.1,
        min_bbox_size=0,
        nms_pre=100,
        max_num=50)
    results = merge_aug_bboxes_3d(aug_results, img_metas, test_cfg)
    expected_boxes_3d = torch.tensor(
        [[-1.0864, -1.9045, -1.2000, 0.7128, 1.5631, 2.1045, -0.1022],
         [1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 3.0394],
         [-2.1728, 3.8090, -2.4000, 1.4256, 3.1262, 4.2090, 0.1022],
         [2.5831, -4.8117, -1.2733, 0.5852, 0.8832, 0.9733, -1.6500],
         [-2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.4916],
         [5.1662, 9.6234, -2.5466, 1.1704, 1.7664, 1.9466, 1.6500],
         [1.0473, -4.1687, -1.2317, 2.3021, 1.8876, 1.9696, -1.6956],
         [-1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.4460],
         [2.0946, 8.3374, -2.4634, 4.6042, 3.7752, 3.9392, 1.6956]])
    expected_scores_3d = torch.tensor([
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.5000, 0.5000
    ])
    expected_labels_3d = torch.tensor([6, 6, 6, 7, 7, 7, 0, 0, 0])
    assert torch.allclose(results['boxes_3d'].tensor, expected_boxes_3d)
    assert torch.allclose(results['scores_3d'], expected_scores_3d)
    assert torch.all(results['labels_3d'] == expected_labels_3d)
Example #7
0
def test_3dssd():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    _setup_seed(0)
    ssd3d_cfg = _get_detector_cfg('3dssd/3dssd_4x4_kitti-3d-car.py')
    self = build_detector(ssd3d_cfg).cuda()
    points_0 = torch.rand([2000, 4], device='cuda')
    points_1 = torch.rand([2000, 4], device='cuda')
    points = [points_0, points_1]
    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)
    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)
    img_metas = [img_meta_0, img_meta_1]
    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
    gt_bboxes = [gt_bbox_0, gt_bbox_1]
    gt_labels_0 = torch.zeros([10], device='cuda').long()
    gt_labels_1 = torch.zeros([10], device='cuda').long()
    gt_labels = [gt_labels_0, gt_labels_1]

    # test forward_train
    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
    assert losses['vote_loss'] >= 0
    assert losses['centerness_loss'] >= 0
    assert losses['center_loss'] >= 0
    assert losses['dir_class_loss'] >= 0
    assert losses['dir_res_loss'] >= 0
    assert losses['corner_loss'] >= 0
    assert losses['size_res_loss'] >= 0

    # test simple_test
    with torch.no_grad():
        results = self.simple_test(points, img_metas)
    boxes_3d = results[0]['boxes_3d']
    scores_3d = results[0]['scores_3d']
    labels_3d = results[0]['labels_3d']
    assert boxes_3d.tensor.shape[0] >= 0
    assert boxes_3d.tensor.shape[1] == 7
    assert scores_3d.shape[0] >= 0
    assert labels_3d.shape[0] >= 0
Example #8
0
def test_load_annotations3D():
    # Test scannet LoadAnnotations3D
    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
    scannet_load_annotations3D = LoadAnnotations3D(
        with_bbox_3d=True,
        with_label_3d=True,
        with_mask_3d=True,
        with_seg_3d=True)
    scannet_results = dict()
    data_path = './tests/data/scannet'

    if scannet_info['annos']['gt_num'] != 0:
        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
        scannet_gt_labels_3d = scannet_info['annos']['class']
    else:
        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
        scannet_gt_labels_3d = np.zeros((1, ))

    # prepare input of loading pipeline
    scannet_results['ann_info'] = dict()
    scannet_results['ann_info']['pts_instance_mask_path'] = osp.join(
        data_path, scannet_info['pts_instance_mask_path'])
    scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join(
        data_path, scannet_info['pts_semantic_mask_path'])
    scannet_results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
    scannet_results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d

    scannet_results['bbox3d_fields'] = []
    scannet_results['pts_mask_fields'] = []
    scannet_results['pts_seg_fields'] = []

    scannet_results = scannet_load_annotations3D(scannet_results)
    scannet_gt_boxes = scannet_results['gt_bboxes_3d']
    scannet_gt_labels = scannet_results['gt_labels_3d']

    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
    repr_str = repr(scannet_load_annotations3D)
    expected_repr_str = 'LoadAnnotations3D(\n    with_bbox_3d=True,     ' \
                        'with_label_3d=True,     with_attr_label=False,     ' \
                        'with_mask_3d=True,     with_seg_3d=True,     ' \
                        'with_bbox=False,     with_label=False,     ' \
                        'with_mask=False,     with_seg=False,     ' \
                        'with_bbox_depth=False,     poly2mask=True)'
    assert repr_str == expected_repr_str
    assert scannet_gt_boxes.tensor.shape == (27, 7)
    assert scannet_gt_labels.shape == (27, )
    assert scannet_pts_instance_mask.shape == (100, )
    assert scannet_pts_semantic_mask.shape == (100, )
Example #9
0
    def get_ann_info(self, index):
        """Get annotation info according to the given index.

        Args:
            index (int): Index of the annotation data to get.

        Returns:
            dict: annotation information consists of the following keys:

                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
                    3D ground truth bboxes
                - gt_labels_3d (np.ndarray): Labels of ground truths.
                - pts_instance_mask_path (str): Path of instance masks.
                - pts_semantic_mask_path (str): Path of semantic masks.
                - axis_align_matrix (np.ndarray): Transformation matrix for \
                    global scene alignment.
        """
        # Use index to get the annos, thus the evalhook could also use this api
        info = self.data_infos[index]
        if info['annos']['gt_num'] != 0:
            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
                np.float32)  # k, 6
            gt_labels_3d = info['annos']['class'].astype(np.long)
        else:
            gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
            gt_labels_3d = np.zeros((0, ), dtype=np.long)

        # to target box structure
        gt_bboxes_3d = DepthInstance3DBoxes(
            gt_bboxes_3d,
            box_dim=gt_bboxes_3d.shape[-1],
            with_yaw=False,
            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)

        pts_instance_mask_path = osp.join(self.data_root,
                                          info['pts_instance_mask_path'])
        pts_semantic_mask_path = osp.join(self.data_root,
                                          info['pts_semantic_mask_path'])

        axis_align_matrix = self._get_axis_align_matrix(info)

        anns_results = dict(
            gt_bboxes_3d=gt_bboxes_3d,
            gt_labels_3d=gt_labels_3d,
            pts_instance_mask_path=pts_instance_mask_path,
            pts_semantic_mask_path=pts_semantic_mask_path,
            axis_align_matrix=axis_align_matrix)
        return anns_results
def test_scannet_pipeline():
    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
                   'window', 'bookshelf', 'picture', 'counter', 'desk',
                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',
                   'sink', 'bathtub', 'garbagebin')

    np.random.seed(0)
    pipelines = [
        dict(type='LoadPointsFromFile',
             coord_type='DEPTH',
             shift_height=True,
             load_dim=6,
             use_dim=[0, 1, 2]),
        dict(type='LoadAnnotations3D',
             with_bbox_3d=True,
             with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
        dict(type='IndoorPointSample', num_points=5),
        dict(type='RandomFlip3D',
             sync_2d=False,
             flip_ratio_bev_horizontal=1.0,
             flip_ratio_bev_vertical=1.0),
        dict(type='GlobalRotScaleTrans',
             rot_range=[-0.087266, 0.087266],
             scale_ratio_range=[1.0, 1.0],
             shift_height=True),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(type='Collect3D',
             keys=[
                 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
                 'pts_instance_mask'
             ]),
    ]
    pipeline = Compose(pipelines)
    info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
    results = dict()
    data_path = './tests/data/scannet'
    results['pts_filename'] = osp.join(data_path, info['pts_path'])
    if info['annos']['gt_num'] != 0:
        scannet_gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
            np.float32)
        scannet_gt_labels_3d = info['annos']['class'].astype(np.long)
    else:
        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
        scannet_gt_labels_3d = np.zeros((1, ), dtype=np.long)
    results['ann_info'] = dict()
    results['ann_info']['pts_instance_mask_path'] = osp.join(
        data_path, info['pts_instance_mask_path'])
    results['ann_info']['pts_semantic_mask_path'] = osp.join(
        data_path, info['pts_semantic_mask_path'])
    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
    results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d

    results['img_fields'] = []
    results['bbox3d_fields'] = []
    results['pts_mask_fields'] = []
    results['pts_seg_fields'] = []

    results = pipeline(results)

    points = results['points']._data
    gt_bboxes_3d = results['gt_bboxes_3d']._data
    gt_labels_3d = results['gt_labels_3d']._data
    pts_semantic_mask = results['pts_semantic_mask']._data
    pts_instance_mask = results['pts_instance_mask']._data
    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
                                    [-0.4065, -3.4857, 2.1330, 2.1682],
                                    [-1.4578, 1.3510, -0.0441, -0.0089],
                                    [2.2428, -1.1323, -0.0288, 0.0064],
                                    [0.7052, -2.9752, 1.5560, 1.5912]])
    expected_gt_bboxes_3d = torch.tensor(
        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
    expected_gt_labels_3d = np.array([
        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
        0, 0, 0, 5, 5, 5
    ])
    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
    assert torch.allclose(points, expected_points, 1e-2)
    assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
                          1e-2)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
def test_sunrgbd_pipeline():
    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
                   'dresser', 'night_stand', 'bookshelf', 'bathtub')
    np.random.seed(0)
    pipelines = [
        dict(type='LoadPointsFromFile',
             coord_type='DEPTH',
             shift_height=True,
             load_dim=6,
             use_dim=[0, 1, 2]),
        dict(type='LoadAnnotations3D'),
        dict(
            type='RandomFlip3D',
            sync_2d=False,
            flip_ratio_bev_horizontal=1.0,
        ),
        dict(type='GlobalRotScaleTrans',
             rot_range=[-0.523599, 0.523599],
             scale_ratio_range=[0.85, 1.15],
             shift_height=True),
        dict(type='IndoorPointSample', num_points=5),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(type='Collect3D', keys=['points', 'gt_bboxes_3d',
                                     'gt_labels_3d']),
    ]
    pipeline = Compose(pipelines)
    results = dict()
    info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
    data_path = './tests/data/sunrgbd'
    results['pts_filename'] = osp.join(data_path, info['pts_path'])

    if info['annos']['gt_num'] != 0:
        gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
            np.float32)
        gt_labels_3d = info['annos']['class'].astype(np.long)
    else:
        gt_bboxes_3d = np.zeros((1, 7), dtype=np.float32)
        gt_labels_3d = np.zeros((1, ), dtype=np.long)

    # prepare input of pipeline
    results['ann_info'] = dict()
    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)
    results['ann_info']['gt_labels_3d'] = gt_labels_3d
    results['img_fields'] = []
    results['bbox3d_fields'] = []
    results['pts_mask_fields'] = []
    results['pts_seg_fields'] = []

    results = pipeline(results)
    points = results['points']._data
    gt_bboxes_3d = results['gt_bboxes_3d']._data
    gt_labels_3d = results['gt_labels_3d']._data
    expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905],
                                    [0.8707, 1.3635, 0.0437, 0.0238],
                                    [0.8636, 1.3511, 0.0504, 0.0304],
                                    [0.8690, 1.3461, 0.1265, 0.1065],
                                    [0.8668, 1.3434, 0.1216, 0.1017]])
    expected_gt_bboxes_3d = torch.tensor(
        [[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989],
         [-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446],
         [0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]])
    expected_gt_labels_3d = np.array([0, 7, 6])
    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
    assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)
    assert torch.allclose(points, expected_points, 1e-2)
Example #12
0
def test_partial_bin_based_box_coder():
    box_coder_cfg = dict(type='PartialBinBasedBBoxCoder',
                         num_sizes=10,
                         num_dir_bins=12,
                         with_rot=True,
                         mean_sizes=[[2.114256, 1.620300, 0.927272],
                                     [0.791118, 1.279516, 0.718182],
                                     [0.923508, 1.867419, 0.845495],
                                     [0.591958, 0.552978, 0.827272],
                                     [0.699104, 0.454178, 0.75625],
                                     [0.69519, 1.346299, 0.736364],
                                     [0.528526, 1.002642, 1.172878],
                                     [0.500618, 0.632163, 0.683424],
                                     [0.404671, 1.071108, 1.688889],
                                     [0.76584, 1.398258, 0.472728]])
    box_coder = build_bbox_coder(box_coder_cfg)

    # test eocode
    gt_bboxes = DepthInstance3DBoxes(
        [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],
         [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],
         [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])

    gt_labels = torch.tensor([0, 1, 2])
    center_target, size_class_target, size_res_target, dir_class_target, \
        dir_res_target = box_coder.encode(gt_bboxes, gt_labels)
    expected_center_target = torch.tensor([[0.8308, 4.1168, -0.2413],
                                           [2.3002, 4.8149, -0.7687],
                                           [-1.1477, 1.8090, -0.1444]])
    expected_size_class_target = torch.tensor([0, 1, 2])
    expected_size_res_target = torch.tensor([[0.1350, 0.2241, 0.9972],
                                             [-0.2193, -0.4166, 0.2328],
                                             [-0.2270, -0.3401, 1.2108]])
    expected_dir_class_target = torch.tensor([3, 3, 0])
    expected_dir_res_target = torch.tensor([0.0778, 0.0322, 0.0552])
    assert torch.allclose(center_target, expected_center_target, atol=1e-4)
    assert torch.all(size_class_target == expected_size_class_target)
    assert torch.allclose(size_res_target, expected_size_res_target, atol=1e-4)
    assert torch.all(dir_class_target == expected_dir_class_target)
    assert torch.allclose(dir_res_target, expected_dir_res_target, atol=1e-4)

    # test decode
    center = torch.tensor([[[0.8014, 3.4134,
                             -0.6133], [2.6375, 8.4191, 2.0438],
                            [4.2017, 5.2504,
                             -0.7851], [-1.0088, 5.4107, 1.6293],
                            [1.4837, 4.0268, 0.6222]]])

    size_class = torch.tensor([[[
        -1.0061, -2.2788, 1.1322, -4.4380, -11.0526, -2.8113, -2.0642, -7.5886,
        -4.8627, -5.0437
    ],
                                [
                                    -2.2058, -0.3527, -1.9976, 0.8815, -2.7980,
                                    -1.9053, -0.5097, -2.0232, -1.4242, -4.1192
                                ],
                                [
                                    -1.4783, -0.1009, -1.1537, 0.3052, -4.3147,
                                    -2.6529, 0.2729, -0.3755, -2.6479, -3.7548
                                ],
                                [
                                    -6.1809, -3.5024, -8.3273, 1.1252, -4.3315,
                                    -7.8288, -4.6091, -5.8153, 0.7480, -10.1396
                                ],
                                [
                                    -9.0424, -3.7883, -6.0788, -1.8855,
                                    -10.2493, -9.7164, -1.0658, -4.1713,
                                    1.1173, -10.6204
                                ]]])

    size_res = torch.tensor([[[[-9.8976e-02, -5.2152e-01, -7.6421e-02],
                               [1.4593e-01, 5.6099e-01, 8.9421e-02],
                               [5.1481e-02, 3.9280e-01, 1.2705e-01],
                               [3.6869e-01, 7.0558e-01, 1.4647e-01],
                               [4.7683e-01, 3.3644e-01, 2.3481e-01],
                               [8.7346e-02, 8.4987e-01, 3.3265e-01],
                               [2.1393e-01, 8.5585e-01, 9.8948e-02],
                               [7.8530e-02, 5.9694e-02, -8.7211e-02],
                               [1.8551e-01, 1.1308e+00, -5.1864e-01],
                               [3.6485e-01, 7.3757e-01, 1.5264e-01]],
                              [[-9.5593e-01, -5.0455e-01, 1.9554e-01],
                               [-1.0870e-01, 1.8025e-01, 1.0228e-01],
                               [-8.2882e-02, -4.3771e-01, 9.2135e-02],
                               [-4.0840e-02, -5.9841e-02, 1.1982e-01],
                               [7.3448e-02, 5.2045e-02, 1.7301e-01],
                               [-4.0440e-02, 4.9532e-02, 1.1266e-01],
                               [3.5857e-02, 1.3564e-02, 1.0212e-01],
                               [-1.0407e-01, -5.9321e-02, 9.2622e-02],
                               [7.4691e-03, 9.3080e-02, -4.4077e-01],
                               [-6.0121e-02, -1.3381e-01, -6.8083e-02]],
                              [[-9.3970e-01, -9.7823e-01, -5.1075e-02],
                               [-1.2843e-01, -1.8381e-01, 7.1327e-02],
                               [-1.2247e-01, -8.1115e-01, 3.6495e-02],
                               [4.9154e-02, -4.5440e-02, 8.9520e-02],
                               [1.5653e-01, 3.5990e-02, 1.6414e-01],
                               [-5.9621e-02, 4.9357e-03, 1.4264e-01],
                               [8.5235e-04, -1.0030e-01, -3.0712e-02],
                               [-3.7255e-02, 2.8996e-02, 5.5545e-02],
                               [3.9298e-02, -4.7420e-02, -4.9147e-01],
                               [-1.1548e-01, -1.5895e-01, -3.9155e-02]],
                              [[-1.8725e+00, -7.4102e-01, 1.0524e+00],
                               [-3.3210e-01, 4.7828e-02, -3.2666e-02],
                               [-2.7949e-01, 5.5541e-02, -1.0059e-01],
                               [-8.5533e-02, 1.4870e-01, -1.6709e-01],
                               [3.8283e-01, 2.6609e-01, 2.1361e-01],
                               [-4.2156e-01, 3.2455e-01, 6.7309e-01],
                               [-2.4336e-02, -8.3366e-02, 3.9913e-01],
                               [8.2142e-03, 4.8323e-02, -1.5247e-01],
                               [-4.8142e-02, -3.0074e-01, -1.6829e-01],
                               [1.3274e-01, -2.3825e-01, -1.8127e-01]],
                              [[-1.2576e+00, -6.1550e-01, 7.9430e-01],
                               [-4.7222e-01, 1.5634e+00, -5.9460e-02],
                               [-3.5367e-01, 1.3616e+00, -1.6421e-01],
                               [-1.6611e-02, 2.4231e-01, -9.6188e-02],
                               [5.4486e-01, 4.6833e-01, 5.1151e-01],
                               [-6.1755e-01, 1.0292e+00, 1.2458e+00],
                               [-6.8152e-02, 2.4786e-01, 9.5088e-01],
                               [-4.8745e-02, 1.5134e-01, -9.9962e-02],
                               [2.4485e-03, -7.5991e-02, 1.3545e-01],
                               [4.1608e-01, -1.2093e-01, -3.1643e-01]]]])

    dir_class = torch.tensor([[[
        -1.0230, -5.1965, -5.2195, 2.4030, -2.7661, -7.3399, -1.1640, -4.0630,
        -5.2940, 0.8245, -3.1869, -6.1743
    ],
                               [
                                   -1.9503, -1.6940, -0.8716, -1.1494, -0.8196,
                                   0.2862, -0.2921, -0.7894, -0.2481, -0.9916,
                                   -1.4304, -1.2466
                               ],
                               [
                                   -1.7435, -1.2043, -0.1265, 0.5083, -0.0717,
                                   -0.9560, -1.6171, -2.6463, -2.3863, -2.1358,
                                   -1.8812, -2.3117
                               ],
                               [
                                   -1.9282, 0.3792, -1.8426, -1.4587, -0.8582,
                                   -3.4639, -3.2133, -3.7867, -7.6781, -6.4459,
                                   -6.2455, -5.4797
                               ],
                               [
                                   -3.1869, 0.4456, -0.5824, 0.9994, -1.0554,
                                   -8.4232, -7.7019, -7.1382, -10.2724,
                                   -7.8229, -8.1860, -8.6194
                               ]]])

    dir_res = torch.tensor(
        [[[
            1.1022e-01, -2.3750e-01, 2.0381e-01, 1.2177e-01, -2.8501e-01,
            1.5351e-01, 1.2218e-01, -2.0677e-01, 1.4468e-01, 1.1593e-01,
            -2.6864e-01, 1.1290e-01
        ],
          [
              -1.5788e-02, 4.1538e-02, -2.2857e-04, -1.4011e-02, 4.2560e-02,
              -3.1186e-03, -5.0343e-02, 6.8110e-03, -2.6728e-02, -3.2781e-02,
              3.6889e-02, -1.5609e-03
          ],
          [
              1.9004e-02, 5.7105e-03, 6.0329e-02, 1.3074e-02, -2.5546e-02,
              -1.1456e-02, -3.2484e-02, -3.3487e-02, 1.6609e-03, 1.7095e-02,
              1.2647e-05, 2.4814e-02
          ],
          [
              1.4482e-01, -6.3083e-02, 5.8307e-02, 9.1396e-02, -8.4571e-02,
              4.5890e-02, 5.6243e-02, -1.2448e-01, -9.5244e-02, 4.5746e-02,
              -1.7390e-02, 9.0267e-02
          ],
          [
              1.8065e-01, -2.0078e-02, 8.5401e-02, 1.0784e-01, -1.2495e-01,
              2.2796e-02, 1.1310e-01, -8.4364e-02, -1.1904e-01, 6.1180e-02,
              -1.8109e-02, 1.1229e-01
          ]]])
    bbox_out = dict(center=center,
                    size_class=size_class,
                    size_res=size_res,
                    dir_class=dir_class,
                    dir_res=dir_res)

    bbox3d = box_coder.decode(bbox_out)
    expected_bbox3d = torch.tensor(
        [[[0.8014, 3.4134, -0.6133, 0.9750, 2.2602, 0.9725, 1.6926],
          [2.6375, 8.4191, 2.0438, 0.5511, 0.4931, 0.9471, 2.6149],
          [4.2017, 5.2504, -0.7851, 0.6411, 0.5075, 0.9168, 1.5839],
          [-1.0088, 5.4107, 1.6293, 0.5064, 0.7017, 0.6602, 0.4605],
          [1.4837, 4.0268, 0.6222, 0.4071, 0.9951, 1.8243, 1.6786]]])
    assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4)

    # test split_pred
    box_preds = torch.rand(2, 79, 256)
    base_xyz = torch.rand(2, 256, 3)
    results = box_coder.split_pred(box_preds, base_xyz)
    obj_scores = results['obj_scores']
    center = results['center']
    dir_class = results['dir_class']
    dir_res_norm = results['dir_res_norm']
    dir_res = results['dir_res']
    size_class = results['size_class']
    size_res_norm = results['size_res_norm']
    size_res = results['size_res']
    sem_scores = results['sem_scores']
    assert obj_scores.shape == torch.Size([2, 256, 2])
    assert center.shape == torch.Size([2, 256, 3])
    assert dir_class.shape == torch.Size([2, 256, 12])
    assert dir_res_norm.shape == torch.Size([2, 256, 12])
    assert dir_res.shape == torch.Size([2, 256, 12])
    assert size_class.shape == torch.Size([2, 256, 10])
    assert size_res_norm.shape == torch.Size([2, 256, 10, 3])
    assert size_res.shape == torch.Size([2, 256, 10, 3])
    assert sem_scores.shape == torch.Size([2, 256, 10])
Example #13
0
def test_show_result_meshlab():
    pcd = 'tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+' \
              '0800__LIDAR_TOP__1533201470948018.pcd.bin'
    box_3d = LiDARInstance3DBoxes(
        torch.tensor(
            [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))
    labels_3d = torch.tensor([0])
    scores_3d = torch.tensor([0.5])
    points = np.random.rand(100, 4)
    img_meta = dict(pts_filename=pcd,
                    boxes_3d=box_3d,
                    box_mode_3d=Box3DMode.LIDAR)
    data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]])
    result = [
        dict(pts_bbox=dict(
            boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    out_dir, file_name = show_result_meshlab(data, result, temp_out_dir)
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    tmp_dir.cleanup()

    # test multi-modality show
    # indoor scene
    pcd = 'tests/data/sunrgbd/points/000001.bin'
    filename = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg'
    box_3d = DepthInstance3DBoxes(
        torch.tensor(
            [[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]]))
    img = np.random.randn(1, 3, 608, 832)
    k_mat = np.array([[529.5000, 0.0000, 365.0000],
                      [0.0000, 529.5000, 265.0000], [0.0000, 0.0000, 1.0000]])
    rt_mat = np.array([[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808],
                       [0.0634, -0.1808, 0.9815]])
    rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ rt_mat.transpose(
        1, 0)
    depth2img = k_mat @ rt_mat
    img_meta = dict(filename=filename,
                    depth2img=depth2img,
                    pcd_horizontal_flip=False,
                    pcd_vertical_flip=False,
                    box_mode_3d=Box3DMode.DEPTH,
                    box_type_3d=DepthInstance3DBoxes,
                    pcd_trans=np.array([0., 0., 0.]),
                    pcd_scale_factor=1.0,
                    pts_filename=pcd,
                    transformation_3d_flow=['R', 'S', 'T'])
    data = dict(points=[[torch.tensor(points)]],
                img_metas=[[img_meta]],
                img=[img])
    result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             0.3,
                                             task='multi_modality-det')
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_png = file_name + '_img.png'
    expected_outfile_proj = file_name + '_pred.png'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    expected_outfile_png_path = os.path.join(out_dir, file_name,
                                             expected_outfile_png)
    expected_outfile_proj_path = os.path.join(out_dir, file_name,
                                              expected_outfile_proj)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()
    # outdoor scene
    pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'
    filename = 'tests/data/kitti/training/image_2/000000.png'
    box_3d = LiDARInstance3DBoxes(
        torch.tensor(
            [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]]))
    img = np.random.randn(1, 3, 384, 1280)
    lidar2img = np.array(
        [[6.09695435e+02, -7.21421631e+02, -1.25125790e+00, -1.23041824e+02],
         [1.80384201e+02, 7.64479828e+00, -7.19651550e+02, -1.01016693e+02],
         [9.99945343e-01, 1.24365499e-04, 1.04513029e-02, -2.69386917e-01],
         [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
    img_meta = dict(filename=filename,
                    pcd_horizontal_flip=False,
                    pcd_vertical_flip=False,
                    box_mode_3d=Box3DMode.LIDAR,
                    box_type_3d=LiDARInstance3DBoxes,
                    pcd_trans=np.array([0., 0., 0.]),
                    pcd_scale_factor=1.0,
                    pts_filename=pcd,
                    lidar2img=lidar2img)
    data = dict(points=[[torch.tensor(points)]],
                img_metas=[[img_meta]],
                img=[img])
    result = [
        dict(pts_bbox=dict(
            boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             0.1,
                                             task='multi_modality-det')
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_png = file_name + '_img.png'
    expected_outfile_proj = file_name + '_pred.png'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    expected_outfile_png_path = os.path.join(out_dir, file_name,
                                             expected_outfile_png)
    expected_outfile_proj_path = os.path.join(out_dir, file_name,
                                              expected_outfile_proj)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()
    # test mono-3d show
    filename = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-' \
               '07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg'
    box_3d = CameraInstance3DBoxes(
        torch.tensor(
            [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]]))
    img = np.random.randn(1, 3, 384, 1280)
    cam2img = np.array([[100.0, 0.0, 50.0], [0.0, 100.0, 50.0],
                        [0.0, 0.0, 1.0]])
    img_meta = dict(filename=filename,
                    pcd_horizontal_flip=False,
                    pcd_vertical_flip=False,
                    box_mode_3d=Box3DMode.CAM,
                    box_type_3d=CameraInstance3DBoxes,
                    pcd_trans=np.array([0., 0., 0.]),
                    pcd_scale_factor=1.0,
                    cam2img=cam2img)
    data = dict(points=[[torch.tensor(points)]],
                img_metas=[[img_meta]],
                img=[img])
    result = [
        dict(img_bbox=dict(
            boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             0.1,
                                             task='mono-det')
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    expected_outfile_png = file_name + '_img.png'
    expected_outfile_proj = file_name + '_pred.png'
    expected_outfile_png_path = os.path.join(out_dir, file_name,
                                             expected_outfile_png)
    expected_outfile_proj_path = os.path.join(out_dir, file_name,
                                              expected_outfile_proj)
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()

    # test seg show
    pcd = 'tests/data/scannet/points/scene0000_00.bin'
    points = np.random.rand(100, 6)
    img_meta = dict(pts_filename=pcd)
    data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]])
    pred_seg = torch.randint(0, 20, (100, ))
    result = [dict(semantic_mask=pred_seg)]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             task='seg')
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    tmp_dir.cleanup()
Example #14
0
    def loss(self,
             bbox_preds,
             points,
             gt_bboxes_3d,
             gt_labels_3d,
             pts_semantic_mask=None,
             pts_instance_mask=None,
             img_metas=None,
             rpn_targets=None,
             gt_bboxes_ignore=None):
        """Compute loss.

        Args:
            bbox_preds (dict): Predictions from forward of h3d bbox head.
            points (list[torch.Tensor]): Input points.
            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
                semantic mask.
            pts_instance_mask (None | list[torch.Tensor]): Point-wise
                instance mask.
            img_metas (list[dict]): Contain pcd and img's meta info.
            rpn_targets (Tuple) : Targets generated by rpn head.
            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
                which bounding.

        Returns:
            dict: Losses of H3dnet.
        """
        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,
         dir_class_targets, dir_res_targets, center_targets, mask_targets,
         valid_gt_masks, objectness_targets, objectness_weights,
         box_loss_weights, valid_gt_weights) = rpn_targets

        losses = {}

        # calculate refined proposal loss
        refined_proposal_loss = self.get_proposal_stage_loss(
            bbox_preds,
            size_class_targets,
            size_res_targets,
            dir_class_targets,
            dir_res_targets,
            center_targets,
            mask_targets,
            objectness_targets,
            objectness_weights,
            box_loss_weights,
            valid_gt_weights,
            suffix='_optimized')
        for key in refined_proposal_loss.keys():
            losses[key + '_optimized'] = refined_proposal_loss[key]

        bbox3d_optimized = self.bbox_coder.decode(bbox_preds,
                                                  suffix='_optimized')

        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,
                                   pts_semantic_mask, pts_instance_mask,
                                   bbox_preds)

        (cues_objectness_label, cues_sem_label, proposal_objectness_label,
         cues_mask, cues_match_mask, proposal_objectness_mask,
         cues_matching_label, obj_surface_line_center) = targets

        # match scores for each geometric primitive
        objectness_scores = bbox_preds['matching_score']
        # match scores for the semantics of primitives
        objectness_scores_sem = bbox_preds['semantic_matching_score']

        primitive_objectness_loss = self.cues_objectness_loss(
            objectness_scores.transpose(2, 1),
            cues_objectness_label,
            weight=cues_mask,
            avg_factor=cues_mask.sum() + 1e-6)

        primitive_sem_loss = self.cues_semantic_loss(
            objectness_scores_sem.transpose(2, 1),
            cues_sem_label,
            weight=cues_mask,
            avg_factor=cues_mask.sum() + 1e-6)

        objectness_scores = bbox_preds['obj_scores_optimized']
        objectness_loss_refine = self.proposal_objectness_loss(
            objectness_scores.transpose(2, 1), proposal_objectness_label)
        primitive_matching_loss = (objectness_loss_refine *
                                   cues_match_mask).sum() / (
                                       cues_match_mask.sum() + 1e-6) * 0.5
        primitive_sem_matching_loss = (
            objectness_loss_refine * proposal_objectness_mask).sum() / (
                proposal_objectness_mask.sum() + 1e-6) * 0.5

        # Get the object surface center here
        batch_size, object_proposal = bbox3d_optimized.shape[:2]
        refined_bbox = DepthInstance3DBoxes(bbox3d_optimized.reshape(
            -1, 7).clone(),
                                            box_dim=bbox3d_optimized.shape[-1],
                                            with_yaw=self.with_angle,
                                            origin=(0.5, 0.5, 0.5))

        pred_obj_surface_center, pred_obj_line_center = \
            refined_bbox.get_surface_line_center()
        pred_obj_surface_center = pred_obj_surface_center.reshape(
            batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3)
        pred_obj_line_center = pred_obj_line_center.reshape(
            batch_size, -1, 12, 3).transpose(1, 2).reshape(batch_size, -1, 3)
        pred_surface_line_center = torch.cat(
            (pred_obj_surface_center, pred_obj_line_center), 1)

        square_dist = self.primitive_center_loss(pred_surface_line_center,
                                                 obj_surface_line_center)

        match_dist = torch.sqrt(square_dist.sum(dim=-1) + 1e-6)
        primitive_centroid_reg_loss = torch.sum(
            match_dist * cues_matching_label) / (cues_matching_label.sum() +
                                                 1e-6)

        refined_loss = dict(
            primitive_objectness_loss=primitive_objectness_loss,
            primitive_sem_loss=primitive_sem_loss,
            primitive_matching_loss=primitive_matching_loss,
            primitive_sem_matching_loss=primitive_sem_matching_loss,
            primitive_centroid_reg_loss=primitive_centroid_reg_loss)

        losses.update(refined_loss)

        return losses
Example #15
0
    def forward(self, feats_dict, sample_mod):
        """Forward pass.

        Args:
            feats_dict (dict): Feature dict from backbone.
            sample_mod (str): Sample mode for vote aggregation layer.
                valid modes are "vote", "seed" and "random".

        Returns:
            dict: Predictions of vote head.
        """
        ret_dict = {}
        aggregated_points = feats_dict['aggregated_points']
        original_feature = feats_dict['aggregated_features']
        batch_size = original_feature.shape[0]
        object_proposal = original_feature.shape[2]

        # Extract surface center, features and semantic predictions
        z_center = feats_dict['pred_z_center']
        xy_center = feats_dict['pred_xy_center']
        z_semantic = feats_dict['sem_cls_scores_z']
        xy_semantic = feats_dict['sem_cls_scores_xy']
        z_feature = feats_dict['aggregated_features_z']
        xy_feature = feats_dict['aggregated_features_xy']
        # Extract line points and features
        line_center = feats_dict['pred_line_center']
        line_feature = feats_dict['aggregated_features_line']

        surface_center_pred = torch.cat((z_center, xy_center), dim=1)
        ret_dict['surface_center_pred'] = surface_center_pred
        ret_dict['surface_sem_pred'] = torch.cat((z_semantic, xy_semantic),
                                                 dim=1)

        # Extract the surface and line centers of rpn proposals
        rpn_proposals = feats_dict['proposal_list']
        rpn_proposals_bbox = DepthInstance3DBoxes(
            rpn_proposals.reshape(-1, 7).clone(),
            box_dim=rpn_proposals.shape[-1],
            with_yaw=self.with_angle,
            origin=(0.5, 0.5, 0.5))

        obj_surface_center, obj_line_center = \
            rpn_proposals_bbox.get_surface_line_center()
        obj_surface_center = obj_surface_center.reshape(
            batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3)
        obj_line_center = obj_line_center.reshape(batch_size, -1, 12,
                                                  3).transpose(1, 2).reshape(
                                                      batch_size, -1, 3)
        ret_dict['surface_center_object'] = obj_surface_center
        ret_dict['line_center_object'] = obj_line_center

        # aggregate primitive z and xy features to rpn proposals
        surface_center_feature_pred = torch.cat((z_feature, xy_feature), dim=2)
        surface_center_feature_pred = torch.cat(
            (surface_center_feature_pred.new_zeros(
                (batch_size, 6, surface_center_feature_pred.shape[2])),
             surface_center_feature_pred),
            dim=1)

        surface_xyz, surface_features, _ = self.surface_center_matcher(
            surface_center_pred,
            surface_center_feature_pred,
            target_xyz=obj_surface_center)

        # aggregate primitive line features to rpn proposals
        line_feature = torch.cat((line_feature.new_zeros(
            (batch_size, 12, line_feature.shape[2])), line_feature),
                                 dim=1)
        line_xyz, line_features, _ = self.line_center_matcher(
            line_center, line_feature, target_xyz=obj_line_center)

        # combine the surface and line features
        combine_features = torch.cat((surface_features, line_features), dim=2)

        matching_features = self.matching_conv(combine_features)
        matching_score = self.matching_pred(matching_features)
        ret_dict['matching_score'] = matching_score.transpose(2, 1)

        semantic_matching_features = self.semantic_matching_conv(
            combine_features)
        semantic_matching_score = self.semantic_matching_pred(
            semantic_matching_features)
        ret_dict['semantic_matching_score'] = \
            semantic_matching_score.transpose(2, 1)

        surface_features = self.surface_feats_aggregation(surface_features)
        line_features = self.line_feats_aggregation(line_features)

        # Combine all surface and line features
        surface_features = surface_features.view(batch_size, -1,
                                                 object_proposal)
        line_features = line_features.view(batch_size, -1, object_proposal)

        combine_feature = torch.cat((surface_features, line_features), dim=1)

        # Final bbox predictions
        bbox_predictions = self.bbox_pred[0](combine_feature)
        bbox_predictions += original_feature
        for conv_module in self.bbox_pred[1:]:
            bbox_predictions = conv_module(bbox_predictions)

        refine_decode_res = self.bbox_coder.split_pred(
            bbox_predictions[:, :self.num_classes + 2],
            bbox_predictions[:, self.num_classes + 2:], aggregated_points)
        for key in refine_decode_res.keys():
            ret_dict[key + '_optimized'] = refine_decode_res[key]
        return ret_dict
def test_boxes_conversion():
    # test CAM to LIDAR and DEPTH
    cam_boxes = CameraInstance3DBoxes(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
                                              Coord3DMode.LIDAR)

    expected_tensor = torch.tensor(
        [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800],
         [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200],
         [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700],
         [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900],
         [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]])
    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)

    convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
                                              Coord3DMode.DEPTH)
    expected_tensor = torch.tensor(
        [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
         [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
         [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
         [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
         [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)

    # test LIDAR to CAM and DEPTH
    lidar_boxes = LiDARInstance3DBoxes(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
                                            Coord3DMode.CAM)
    expected_tensor = torch.tensor(
        [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800],
         [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200],
         [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700],
         [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900],
         [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]])
    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)

    convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
                                              Coord3DMode.DEPTH)
    expected_tensor = torch.tensor(
        [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
         [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
         [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
         [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
         [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)

    # test DEPTH to CAM and LIDAR
    depth_boxes = DepthInstance3DBoxes(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
                                            Coord3DMode.CAM)
    expected_tensor = torch.tensor(
        [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
         [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
         [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
         [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
         [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)

    convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
                                              Coord3DMode.LIDAR)
    expected_tensor = torch.tensor(
        [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
         [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
         [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
         [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
         [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
Example #17
0
def test_h3d_head():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    _setup_seed(0)

    h3d_head_cfg = _get_roi_head_cfg('h3dnet/h3dnet_3x8_scannet-3d-18class.py')

    num_point = 128
    num_proposal = 64
    h3d_head_cfg.primitive_list[0].vote_aggregation_cfg.num_point = num_point
    h3d_head_cfg.primitive_list[1].vote_aggregation_cfg.num_point = num_point
    h3d_head_cfg.primitive_list[2].vote_aggregation_cfg.num_point = num_point
    h3d_head_cfg.bbox_head.num_proposal = num_proposal
    self = build_head(h3d_head_cfg).cuda()

    # prepare roi outputs
    fp_xyz = [torch.rand([1, num_point, 3], dtype=torch.float32).cuda()]
    hd_features = torch.rand([1, 256, num_point], dtype=torch.float32).cuda()
    fp_indices = [torch.randint(0, 128, [1, num_point]).cuda()]
    aggregated_points = torch.rand([1, num_proposal, 3],
                                   dtype=torch.float32).cuda()
    aggregated_features = torch.rand([1, 128, num_proposal],
                                     dtype=torch.float32).cuda()
    proposal_list = torch.cat([
        torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4 - 2,
        torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4,
        torch.zeros([1, num_proposal, 1]).cuda()
    ],
                              dim=-1)

    input_dict = dict(fp_xyz_net0=fp_xyz,
                      hd_feature=hd_features,
                      aggregated_points=aggregated_points,
                      aggregated_features=aggregated_features,
                      seed_points=fp_xyz[0],
                      seed_indices=fp_indices[0],
                      proposal_list=proposal_list)

    # prepare gt label
    from mmdet3d.core.bbox import DepthInstance3DBoxes
    gt_bboxes_3d = [
        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()),
        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda())
    ]
    gt_labels_3d = torch.randint(0, 18, [1, 4]).cuda()
    gt_labels_3d = [gt_labels_3d[0]]
    pts_semantic_mask = torch.randint(0, 19, [1, num_point]).cuda()
    pts_semantic_mask = [pts_semantic_mask[0]]
    pts_instance_mask = torch.randint(0, 4, [1, num_point]).cuda()
    pts_instance_mask = [pts_instance_mask[0]]
    points = torch.rand([1, num_point, 3], dtype=torch.float32).cuda()

    # prepare rpn targets
    vote_targets = torch.rand([1, num_point, 9], dtype=torch.float32).cuda()
    vote_target_masks = torch.rand([1, num_point], dtype=torch.float32).cuda()
    size_class_targets = torch.rand([1, num_proposal],
                                    dtype=torch.float32).cuda().long()
    size_res_targets = torch.rand([1, num_proposal, 3],
                                  dtype=torch.float32).cuda()
    dir_class_targets = torch.rand([1, num_proposal],
                                   dtype=torch.float32).cuda().long()
    dir_res_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda()
    center_targets = torch.rand([1, 4, 3], dtype=torch.float32).cuda()
    mask_targets = torch.rand([1, num_proposal],
                              dtype=torch.float32).cuda().long()
    valid_gt_masks = torch.rand([1, 4], dtype=torch.float32).cuda()
    objectness_targets = torch.rand([1, num_proposal],
                                    dtype=torch.float32).cuda().long()
    objectness_weights = torch.rand([1, num_proposal],
                                    dtype=torch.float32).cuda()
    box_loss_weights = torch.rand([1, num_proposal],
                                  dtype=torch.float32).cuda()
    valid_gt_weights = torch.rand([1, 4], dtype=torch.float32).cuda()

    targets = (vote_targets, vote_target_masks, size_class_targets,
               size_res_targets, dir_class_targets, dir_res_targets,
               center_targets, mask_targets, valid_gt_masks,
               objectness_targets, objectness_weights, box_loss_weights,
               valid_gt_weights)

    input_dict['targets'] = targets

    # train forward
    ret_dict = self.forward_train(input_dict,
                                  points=points,
                                  gt_bboxes_3d=gt_bboxes_3d,
                                  gt_labels_3d=gt_labels_3d,
                                  pts_semantic_mask=pts_semantic_mask,
                                  pts_instance_mask=pts_instance_mask,
                                  img_metas=None)

    assert ret_dict['flag_loss_z'] >= 0
    assert ret_dict['vote_loss_z'] >= 0
    assert ret_dict['center_loss_z'] >= 0
    assert ret_dict['size_loss_z'] >= 0
    assert ret_dict['sem_loss_z'] >= 0
    assert ret_dict['objectness_loss_optimized'] >= 0
    assert ret_dict['primitive_sem_matching_loss'] >= 0
Example #18
0
def test_primitive_head():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    _setup_seed(0)

    primitive_head_cfg = dict(
        type='PrimitiveHead',
        num_dims=2,
        num_classes=18,
        primitive_mode='z',
        vote_module_cfg=dict(in_channels=256,
                             vote_per_seed=1,
                             gt_per_seed=1,
                             conv_channels=(256, 256),
                             conv_cfg=dict(type='Conv1d'),
                             norm_cfg=dict(type='BN1d'),
                             norm_feats=True,
                             vote_loss=dict(type='ChamferDistance',
                                            mode='l1',
                                            reduction='none',
                                            loss_dst_weight=10.0)),
        vote_aggregation_cfg=dict(type='PointSAModule',
                                  num_point=64,
                                  radius=0.3,
                                  num_sample=16,
                                  mlp_channels=[256, 128, 128, 128],
                                  use_xyz=True,
                                  normalize_xyz=True),
        feat_channels=(128, 128),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        objectness_loss=dict(type='CrossEntropyLoss',
                             class_weight=[0.4, 0.6],
                             reduction='mean',
                             loss_weight=1.0),
        center_loss=dict(type='ChamferDistance',
                         mode='l1',
                         reduction='sum',
                         loss_src_weight=1.0,
                         loss_dst_weight=1.0),
        semantic_reg_loss=dict(type='ChamferDistance',
                               mode='l1',
                               reduction='sum',
                               loss_src_weight=1.0,
                               loss_dst_weight=1.0),
        semantic_cls_loss=dict(type='CrossEntropyLoss',
                               reduction='sum',
                               loss_weight=1.0),
        train_cfg=dict(dist_thresh=0.2,
                       var_thresh=1e-2,
                       lower_thresh=1e-6,
                       num_point=100,
                       num_point_line=10,
                       line_thresh=0.2))

    self = build_head(primitive_head_cfg).cuda()
    fp_xyz = [torch.rand([2, 64, 3], dtype=torch.float32).cuda()]
    hd_features = torch.rand([2, 256, 64], dtype=torch.float32).cuda()
    fp_indices = [torch.randint(0, 64, [2, 64]).cuda()]
    input_dict = dict(fp_xyz_net0=fp_xyz,
                      hd_feature=hd_features,
                      fp_indices_net0=fp_indices)

    # test forward
    ret_dict = self(input_dict, 'vote')
    assert ret_dict['center_z'].shape == torch.Size([2, 64, 3])
    assert ret_dict['size_residuals_z'].shape == torch.Size([2, 64, 2])
    assert ret_dict['sem_cls_scores_z'].shape == torch.Size([2, 64, 18])
    assert ret_dict['aggregated_points_z'].shape == torch.Size([2, 64, 3])

    # test loss
    points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda()
    ret_dict['seed_points'] = fp_xyz[0]
    ret_dict['seed_indices'] = fp_indices[0]

    from mmdet3d.core.bbox import DepthInstance3DBoxes
    gt_bboxes_3d = [
        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()),
        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda())
    ]
    gt_labels_3d = torch.randint(0, 18, [2, 4]).cuda()
    gt_labels_3d = [gt_labels_3d[0], gt_labels_3d[1]]
    pts_semantic_mask = torch.randint(0, 19, [2, 1024]).cuda()
    pts_semantic_mask = [pts_semantic_mask[0], pts_semantic_mask[1]]
    pts_instance_mask = torch.randint(0, 4, [2, 1024]).cuda()
    pts_instance_mask = [pts_instance_mask[0], pts_instance_mask[1]]

    loss_input_dict = dict(bbox_preds=ret_dict,
                           points=points,
                           gt_bboxes_3d=gt_bboxes_3d,
                           gt_labels_3d=gt_labels_3d,
                           pts_semantic_mask=pts_semantic_mask,
                           pts_instance_mask=pts_instance_mask)
    losses_dict = self.loss(**loss_input_dict)

    assert losses_dict['flag_loss_z'] >= 0
    assert losses_dict['vote_loss_z'] >= 0
    assert losses_dict['center_loss_z'] >= 0
    assert losses_dict['size_loss_z'] >= 0
    assert losses_dict['sem_loss_z'] >= 0

    # 'Primitive_mode' should be one of ['z', 'xy', 'line']
    with pytest.raises(AssertionError):
        primitive_head_cfg['vote_module_cfg']['in_channels'] = 'xyz'
        build_head(primitive_head_cfg)
def test_scannet_pipeline():
    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
                   'window', 'bookshelf', 'picture', 'counter', 'desk',
                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',
                   'sink', 'bathtub', 'garbagebin')

    np.random.seed(0)
    pipelines = [
        dict(type='LoadPointsFromFile',
             coord_type='DEPTH',
             shift_height=True,
             load_dim=6,
             use_dim=[0, 1, 2]),
        dict(type='LoadAnnotations3D',
             with_bbox_3d=True,
             with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
        dict(type='GlobalAlignment', rotation_axis=2),
        dict(type='PointSegClassMapping',
             valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
                            33, 34, 36, 39)),
        dict(type='IndoorPointSample', num_points=5),
        dict(type='RandomFlip3D',
             sync_2d=False,
             flip_ratio_bev_horizontal=1.0,
             flip_ratio_bev_vertical=1.0),
        dict(type='GlobalRotScaleTrans',
             rot_range=[-0.087266, 0.087266],
             scale_ratio_range=[1.0, 1.0],
             shift_height=True),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(type='Collect3D',
             keys=[
                 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
                 'pts_instance_mask'
             ]),
    ]
    pipeline = Compose(pipelines)
    info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
    results = dict()
    data_path = './tests/data/scannet'
    results['pts_filename'] = osp.join(data_path, info['pts_path'])
    if info['annos']['gt_num'] != 0:
        scannet_gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
            np.float32)
        scannet_gt_labels_3d = info['annos']['class'].astype(np.long)
    else:
        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
        scannet_gt_labels_3d = np.zeros((1, ), dtype=np.long)
    results['ann_info'] = dict()
    results['ann_info']['pts_instance_mask_path'] = osp.join(
        data_path, info['pts_instance_mask_path'])
    results['ann_info']['pts_semantic_mask_path'] = osp.join(
        data_path, info['pts_semantic_mask_path'])
    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
    results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
    results['ann_info']['axis_align_matrix'] = \
        info['annos']['axis_align_matrix']

    results['img_fields'] = []
    results['bbox3d_fields'] = []
    results['pts_mask_fields'] = []
    results['pts_seg_fields'] = []

    results = pipeline(results)

    points = results['points']._data
    gt_bboxes_3d = results['gt_bboxes_3d']._data
    gt_labels_3d = results['gt_labels_3d']._data
    pts_semantic_mask = results['pts_semantic_mask']._data
    pts_instance_mask = results['pts_instance_mask']._data
    expected_points = torch.tensor(
        [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00],
         [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00],
         [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03],
         [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
         [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
    expected_gt_bboxes_3d = torch.tensor(
        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
    expected_gt_labels_3d = np.array([
        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
        0, 0, 0, 5, 5, 5
    ])
    expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
    assert torch.allclose(points, expected_points, 1e-2)
    assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
                          1e-2)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
Example #20
0
def test_groupfree3d_head():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    _setup_seed(0)
    vote_head_cfg = _get_vote_head_cfg(
        'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
    self = build_head(vote_head_cfg).cuda()

    fp_xyz = [torch.rand([2, 256, 3], dtype=torch.float32).cuda()]
    fp_features = [torch.rand([2, 288, 256], dtype=torch.float32).cuda()]
    fp_indices = [torch.randint(0, 128, [2, 256]).cuda()]

    input_dict = dict(fp_xyz=fp_xyz,
                      fp_features=fp_features,
                      fp_indices=fp_indices)

    # test forward
    ret_dict = self(input_dict, 'kps')
    assert ret_dict['seeds_obj_cls_logits'].shape == torch.Size([2, 1, 256])
    assert ret_dict['s5.center'].shape == torch.Size([2, 256, 3])
    assert ret_dict['s5.dir_class'].shape == torch.Size([2, 256, 1])
    assert ret_dict['s5.dir_res'].shape == torch.Size([2, 256, 1])
    assert ret_dict['s5.size_class'].shape == torch.Size([2, 256, 18])
    assert ret_dict['s5.size_res'].shape == torch.Size([2, 256, 18, 3])
    assert ret_dict['s5.obj_scores'].shape == torch.Size([2, 256, 1])
    assert ret_dict['s5.sem_scores'].shape == torch.Size([2, 256, 18])

    # test losses
    points = [torch.rand([50000, 4], device='cuda') for i in range(2)]
    gt_bbox1 = torch.rand([10, 7], dtype=torch.float32).cuda()
    gt_bbox2 = torch.rand([10, 7], dtype=torch.float32).cuda()

    gt_bbox1 = DepthInstance3DBoxes(gt_bbox1)
    gt_bbox2 = DepthInstance3DBoxes(gt_bbox2)
    gt_bboxes = [gt_bbox1, gt_bbox2]

    pts_instance_mask_1 = torch.randint(0, 10, [50000], device='cuda')
    pts_instance_mask_2 = torch.randint(0, 10, [50000], device='cuda')
    pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2]

    pts_semantic_mask_1 = torch.randint(0, 19, [50000], device='cuda')
    pts_semantic_mask_2 = torch.randint(0, 19, [50000], device='cuda')
    pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2]

    labels_1 = torch.randint(0, 18, [10], device='cuda')
    labels_2 = torch.randint(0, 18, [10], device='cuda')
    gt_labels = [labels_1, labels_2]

    losses = self.loss(ret_dict, points, gt_bboxes, gt_labels,
                       pts_semantic_mask, pts_instance_mask)

    assert losses['s5.objectness_loss'] >= 0
    assert losses['s5.semantic_loss'] >= 0
    assert losses['s5.center_loss'] >= 0
    assert losses['s5.dir_class_loss'] >= 0
    assert losses['s5.dir_res_loss'] >= 0
    assert losses['s5.size_class_loss'] >= 0
    assert losses['s5.size_res_loss'] >= 0

    # test multiclass_nms_single
    obj_scores = torch.rand([256], device='cuda')
    sem_scores = torch.rand([256, 18], device='cuda')
    points = torch.rand([50000, 3], device='cuda')
    bbox = torch.rand([256, 7], device='cuda')
    input_meta = dict(box_type_3d=DepthInstance3DBoxes)
    bbox_selected, score_selected, labels = \
        self.multiclass_nms_single(obj_scores,
                                   sem_scores,
                                   bbox,
                                   points,
                                   input_meta)
    assert bbox_selected.shape[0] >= 0
    assert bbox_selected.shape[1] == 7
    assert score_selected.shape[0] >= 0
    assert labels.shape[0] >= 0

    # test get_boxes
    points = torch.rand([1, 50000, 3], device='cuda')
    seed_points = torch.rand([1, 1024, 3], device='cuda')
    seed_indices = torch.randint(0, 50000, [1, 1024], device='cuda')
    obj_scores = torch.rand([1, 256, 1], device='cuda')
    center = torch.rand([1, 256, 3], device='cuda')
    dir_class = torch.rand([1, 256, 1], device='cuda')
    dir_res_norm = torch.rand([1, 256, 1], device='cuda')
    dir_res = torch.rand([1, 256, 1], device='cuda')
    size_class = torch.rand([1, 256, 18], device='cuda')
    size_res = torch.rand([1, 256, 18, 3], device='cuda')
    sem_scores = torch.rand([1, 256, 18], device='cuda')
    bbox_preds = dict()
    bbox_preds['seed_points'] = seed_points
    bbox_preds['seed_indices'] = seed_indices
    bbox_preds['s5.obj_scores'] = obj_scores
    bbox_preds['s5.center'] = center
    bbox_preds['s5.dir_class'] = dir_class
    bbox_preds['s5.dir_res_norm'] = dir_res_norm
    bbox_preds['s5.dir_res'] = dir_res
    bbox_preds['s5.size_class'] = size_class
    bbox_preds['s5.size_res'] = size_res
    bbox_preds['s5.sem_scores'] = sem_scores

    self.test_cfg['prediction_stages'] = 'last'
    results = self.get_bboxes(points, bbox_preds, [input_meta])
    assert results[0][0].tensor.shape[0] >= 0
    assert results[0][0].tensor.shape[1] == 7
    assert results[0][1].shape[0] >= 0
    assert results[0][2].shape[0] >= 0