Ejemplo n.º 1
0
    def __getitem__(self, index):

        # # for debug purpose, uncomment this to fix the frame
        # index = 0
        # index = 300
        # for index, info in enumerate(self.kitti_infos):
        #     if info['point_cloud']['lidar_idx'] == '100001284':
        #         break
        # print(self.kitti_infos[index]['point_cloud']['lidar_idx'])

        info = copy.deepcopy(self.kitti_infos[index])

        sample_idx = info['point_cloud']['lidar_idx']

        if cfg.TAG_PTS_WITH_RGB:
            points = self.get_colored_lidar(sample_idx)
        else:
            points = self.get_lidar(sample_idx)
        calib = self.get_calib(sample_idx)

        img_shape = info['image']['image_shape']
        if cfg.DATA_CONFIG.FOV_POINTS_ONLY:
            pts_rect = calib.lidar_to_rect(points[:, 0:3])
            fov_flag = self.get_fov_flag(pts_rect, img_shape, calib)
            points = points[fov_flag]

        input_dict = {
            'points': points,
            'sample_idx': sample_idx,
            'calib': calib,
        }

        if 'annos' in info:
            annos = info['annos']
            annos = common_utils.drop_info_with_name(annos, name='DontCare')
            loc, dims, rots = annos['location'], annos['dimensions'], annos[
                'rotation_y']
            gt_names = annos['name']
            bbox = annos['bbox']
            gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                      axis=1).astype(np.float32)
            if 'gt_boxes_lidar' in annos:
                gt_boxes_lidar = annos['gt_boxes_lidar']
            else:
                gt_boxes_lidar = box_utils.boxes3d_camera_to_lidar(
                    gt_boxes, calib)

            input_dict.update({
                'gt_boxes': gt_boxes,
                'gt_names': gt_names,
                'gt_box2d': bbox,
                'gt_boxes_lidar': gt_boxes_lidar
            })

        # debuggin model; here we cheat by tagging if each point is in the object bbox
        if cfg.TAG_PTS_IF_IN_GT_BBOXES:
            points[:, 3] = 0
            corners_lidar = box_utils.boxes3d_to_corners3d_lidar(
                gt_boxes_lidar)
            for k in range(len(gt_boxes_lidar)):
                if gt_names[k] == 'Car':
                    flag = box_utils.in_hull(points[:, 0:3], corners_lidar[k])
                    points[flag, 3] = 1
            input_dict['points'] = points

        example = self.prepare_data(input_dict=input_dict,
                                    has_label='annos' in info)
        example['sample_idx'] = sample_idx
        example['image_shape'] = img_shape

        # add the bev maps if HD map is available (eg, argoverse)
        if 'bev' in cfg.MODE:
            bev = self.get_bev(sample_idx)
            example['bev'] = bev

        if cfg.INJECT_SEMANTICS or cfg.USE_PSEUDOLIDAR:
            # todo: check the ordering of the imagenet mean and std. is it rgb or bgr?
            img = self.get_image(sample_idx)

            # preprocessing for the image to work for hrnet which works on cityscapes

            # if on kitti (not on argoverse), some of the images don't have the same size,
            # so we must scale them to the same size, then during voxelization, rescale
            # to the right size for projecting lidar pts onto image plane to work

            # if the height is not set, then we scale it according to the width
            if cfg.INJECT_SEMANTICS_HEIGHT == 0:
                img = image_resize(img, cfg.INJECT_SEMANTICS_WIDTH)
            else:
                img = cv2.resize(
                    img,
                    (cfg.INJECT_SEMANTICS_WIDTH, cfg.INJECT_SEMANTICS_HEIGHT),
                    interpolation=cv2.INTER_LINEAR)
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            img = input_transform(img, mean, std)  # normalize wrt to imagenet
            img = img.transpose(
                (2, 0, 1))  # change dim ordering to c, h, w for torch
            example['img'] = img
            example['pseudolidar_image_shape'] = img.shape[-2:]

            # if using pseudolidar from depth estimation, we need to crop out the sky and the road
            if cfg.USE_PSEUDOLIDAR:
                # first scale down, then crop out sky and roads
                oh, ow = img.shape[-2:]
                bottom_p, top_p = .15, cfg.DEPTH_MAP_TOP_MARGIN_PCT
                top = oh * top_p
                bottom = oh - (oh * bottom_p)
                top = int(top)
                bottom = int(bottom)
                img_cropped = img[:, top:bottom, :]
                example['img'] = img_cropped

        return example
Ejemplo n.º 2
0
        def process_single_scene(sample_idx):
            print('%s sample_idx: %s' % (self.split, sample_idx))
            info = {}
            pc_info = {'num_features': 4, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info

            image_info = {
                'image_idx': sample_idx,
                'image_shape': self.get_image_shape(sample_idx)
            }
            info['image'] = image_info
            calib = self.get_calib(sample_idx)

            P2 = np.concatenate(
                [calib.P2, np.array([[0., 0., 0., 1.]])], axis=0)
            R0_4x4 = np.zeros([4, 4], dtype=calib.R0.dtype)
            R0_4x4[3, 3] = 1.
            R0_4x4[:3, :3] = calib.R0
            V2C_4x4 = np.concatenate(
                [calib.V2C, np.array([[0., 0., 0., 1.]])], axis=0)
            calib_info = {
                'P2': P2,
                'R0_rect': R0_4x4,
                'Tr_velo_to_cam': V2C_4x4
            }

            info['calib'] = calib_info

            if has_label:
                obj_list = self.get_label(sample_idx)
                annotations = {}
                annotations['name'] = np.array(
                    [obj.cls_type for obj in obj_list])
                annotations['truncated'] = np.array(
                    [obj.truncation for obj in obj_list])
                annotations['occluded'] = np.array(
                    [obj.occlusion for obj in obj_list])
                annotations['alpha'] = np.array(
                    [obj.alpha for obj in obj_list])
                annotations['bbox'] = np.concatenate(
                    [obj.box2d.reshape(1, 4) for obj in obj_list], axis=0)
                annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w]
                                                      for obj in obj_list
                                                      ])  # lhw(camera) format
                annotations['location'] = np.concatenate(
                    [obj.loc.reshape(1, 3) for obj in obj_list], axis=0)
                annotations['rotation_y'] = np.array(
                    [obj.ry for obj in obj_list])
                annotations['score'] = np.array(
                    [obj.score for obj in obj_list])
                annotations['difficulty'] = np.array(
                    [obj.level for obj in obj_list], np.int32)

                num_objects = len([
                    obj.cls_type for obj in obj_list
                    if obj.cls_type != 'DontCare'
                ])
                num_gt = len(annotations['name'])
                index = list(
                    range(num_objects)) + [-1] * (num_gt - num_objects)
                annotations['index'] = np.array(index, dtype=np.int32)

                loc = annotations['location'][:num_objects]
                dims = annotations['dimensions'][:num_objects]
                rots = annotations['rotation_y'][:num_objects]
                loc_lidar = calib.rect_to_lidar(loc)
                l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3]
                gt_boxes_lidar = np.concatenate(
                    [loc_lidar, w, l, h, rots[..., np.newaxis]], axis=1)
                annotations['gt_boxes_lidar'] = gt_boxes_lidar

                info['annos'] = annotations

                if count_inside_pts:
                    points = self.get_lidar(sample_idx)
                    calib = self.get_calib(sample_idx)
                    pts_rect = calib.lidar_to_rect(points[:, 0:3])

                    fov_flag = self.get_fov_flag(pts_rect,
                                                 info['image']['image_shape'],
                                                 calib)
                    pts_fov = points[fov_flag]
                    corners_lidar = box_utils.boxes3d_to_corners3d_lidar(
                        gt_boxes_lidar)
                    num_points_in_gt = -np.ones(num_gt, dtype=np.int32)

                    for k in range(num_objects):
                        flag = box_utils.in_hull(pts_fov[:, 0:3],
                                                 corners_lidar[k])
                        num_points_in_gt[k] = flag.sum()
                    annotations['num_points_in_gt'] = num_points_in_gt

            return info
Ejemplo n.º 3
0
        def process_single_scene(sample_idx):
            print('%s sample_idx: %s' % (self.split, sample_idx))

            # an info of a scene is represented as a nest dict
            info = {}

            # point cloud info dict
            pc_info = {'num_features': 4, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info

            # image info dict
            image_info = {
                'image_idx': sample_idx,
                'image_shape': self.get_image_shape(sample_idx)
            }
            info['image'] = image_info

            # calibration matrixes info dict
            # convert calib matrices to 4x4 so that we can use them in homogeneous coordinates
            calib = self.get_calib(sample_idx)
            P2 = np.concatenate(
                [calib.P2, np.array([[0., 0., 0., 1.]])], axis=0)
            R0_4x4 = np.zeros([4, 4], dtype=calib.R0.dtype)
            R0_4x4[3, 3] = 1.
            R0_4x4[:3, :3] = calib.R0
            V2C_4x4 = np.concatenate(
                [calib.V2C, np.array([[0., 0., 0., 1.]])], axis=0)
            calib_info = {
                'P2': P2,
                'R0_rect': R0_4x4,
                'Tr_velo_to_cam': V2C_4x4
            }
            info['calib'] = calib_info

            if has_label:
                obj_list = self.get_label(sample_idx)
                annotations = {}
                annotations['name'] = np.array(
                    [obj.cls_type for obj in obj_list])
                annotations['truncated'] = np.array(
                    [obj.truncation for obj in obj_list])
                annotations['occluded'] = np.array(
                    [obj.occlusion for obj in obj_list])
                annotations['alpha'] = np.array(
                    [obj.alpha for obj in obj_list])
                annotations['bbox'] = np.concatenate(
                    [obj.box2d.reshape(1, 4) for obj in obj_list],
                    axis=0)  # this is only the 2d image bbox
                annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w]
                                                      for obj in obj_list
                                                      ])  # lhw(camera) format
                annotations['location'] = np.concatenate(
                    [obj.loc.reshape(1, 3) for obj in obj_list], axis=0)
                annotations['rotation_y'] = np.array(
                    [obj.ry for obj in obj_list])
                annotations['score'] = np.array(
                    [obj.score for obj in obj_list])
                annotations['difficulty'] = np.array(
                    [obj.level for obj in obj_list], np.int32)

                # filter out the objects of class DontCare
                # index refers to the object index in that scene, -1 means DontCare
                num_objects = len([
                    obj.cls_type for obj in obj_list
                    if obj.cls_type != 'DontCare'
                ])
                num_gt = len(annotations['name'])
                index = list(
                    range(num_objects)) + [-1] * (num_gt - num_objects)
                annotations['index'] = np.array(index, dtype=np.int32)

                # convert the 3d bboxes annotations in rectified camera coordinate to velodyne lidar coordinate
                loc = annotations['location'][:num_objects]
                dims = annotations['dimensions'][:num_objects]
                rots = annotations['rotation_y'][:num_objects]
                loc_lidar = calib.rect_to_lidar(loc)
                l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3]
                gt_boxes_lidar = np.concatenate(
                    [loc_lidar, w, l, h, rots[..., np.newaxis]], axis=1)
                annotations['gt_boxes_lidar'] = gt_boxes_lidar

                info['annos'] = annotations

                if count_inside_pts:
                    points = self.get_lidar(sample_idx)
                    calib = self.get_calib(sample_idx)
                    pts_rect = calib.lidar_to_rect(points[:, 0:3])

                    fov_flag = self.get_fov_flag(pts_rect,
                                                 info['image']['image_shape'],
                                                 calib)
                    pts_fov = points[fov_flag]
                    corners_lidar = box_utils.boxes3d_to_corners3d_lidar(
                        gt_boxes_lidar)
                    num_points_in_gt = -np.ones(num_gt, dtype=np.int32)

                    for k in range(num_objects):
                        flag = box_utils.in_hull(pts_fov[:, 0:3],
                                                 corners_lidar[k])
                        num_points_in_gt[k] = flag.sum()
                    annotations['num_points_in_gt'] = num_points_in_gt

            return info