Esempio n. 1
0
    def coalesce_pc_data(self, items):
        """Combine set of point cloud datums into a single point cloud.

        Parameters
        ----------
        items: list
            List of OrderedDict, containing parsed point cloud or image data.

        Returns
        -------
        coalesced_pc: OrderedDict
           OrderedDict containing coalesced point cloud and associated metadata.
        """
        pc_items = [item for item in items if POINT_CLOUD_KEY in item]
        assert self.coalesce_point_cloud
        assert len(pc_items) == len(LIDAR_DATUM_NAMES)

        # Only coalesce if there's more than 1 point cloud
        coalesced_pc = OrderedDict()
        X_V_merged, bbox_3d_V_merged, class_ids_merged, instance_ids_merged = [], [], [], []
        for item in pc_items:
            X_S = item[POINT_CLOUD_KEY]
            p_VS = item['extrinsics']
            X_V_merged.append(p_VS * X_S)
            if 'bounding_box_3d' in item:
                bbox_3d_V_merged.extend([p_VS * bbox_3d for bbox_3d in item['bounding_box_3d']])
                class_ids_merged.append(item['class_ids'])
                instance_ids_merged.append(item['instance_ids'])

        coalesced_pc['datum_name'] = COALESCED_LIDAR_DATUM_NAME
        coalesced_pc['timestamp'] = pc_items[0]['timestamp']
        coalesced_pc[POINT_CLOUD_KEY] = np.vstack(X_V_merged)
        coalesced_pc['extra_channels'] = np.vstack([item['extra_channels'] for item in pc_items])
        # Note: Pose for the coalesced point cloud is identical to pose_LV
        # Note: Extrinsics for this "virtual" datum is identity since the point cloud is defined in the (V)ehicle frame of reference.
        coalesced_pc['extrinsics'] = Pose()
        p_LS = pc_items[0]['pose']
        p_VS = pc_items[0]['extrinsics']
        p_LV = p_LS * p_VS.inverse()
        coalesced_pc['pose'] = p_LV

        if len(bbox_3d_V_merged):
            # Add bounding boxes and metadata in the vehicle frame
            coalesced_pc['bounding_box_3d'] = bbox_3d_V_merged
            coalesced_pc['class_ids'] = np.hstack(class_ids_merged)
            coalesced_pc['instance_ids'] = np.hstack(instance_ids_merged)
            # Keep only the unique instance IDs
            coalesced_pc['instance_ids'], unique_idx = np.unique(coalesced_pc['instance_ids'], return_index=True)
            coalesced_pc['bounding_box_3d'] = [
                bbox_3d for idx, bbox_3d in enumerate(coalesced_pc['bounding_box_3d']) if idx in unique_idx
            ]
            coalesced_pc['class_ids'] = np.array([
                bbox_3d for idx, bbox_3d in enumerate(coalesced_pc['class_ids']) if idx in unique_idx
            ])

        assert len(coalesced_pc['bounding_box_3d']) == len(coalesced_pc['class_ids'])
        assert len(coalesced_pc['bounding_box_3d']) == len(coalesced_pc['instance_ids'])

        return coalesced_pc
Esempio n. 2
0
def parse_annotations_3d_proto(annotation_file,
                               json_category_id_to_contiguous_id):
    """Parse annotations from BoundingBox2DAnnotations structure.

    Parameters
    ----------
    annotations: str
        Path to JSON file containing annotations for 2D bounding boxes

    json_category_id_to_contiguous_id: dict
        Lookup from COCO style JSON id's to contiguous id's

    transformation: Pose
        Pose object that can be used to convert annotations to a new reference frame.

    Returns
    -------
    tuple holding:
        boxes: list of BoundingBox3D
            Tensor containing bounding boxes for this sample
            (pose.quat.qw, pose.quat.qx, pose.quat.qy, pose.quat.qz,
            pose.tvec.x, pose.tvec.y, pose.tvec.z, width, length, height)
            in absolute scale

        class_ids: np.int64 array
            Numpy array containing class ids (aligned with ``boxes``)

        instance_ids: dict
            Map from instance_id to tuple of (box, class_id)
    """

    # *CAVEAT*: `attributes` field is defined in proto, but not being read here.
    # TODO: read attributes (see above); change outputs of all function calls.

    with open(annotation_file) as _f:
        annotations = open_pbobject(annotation_file, BoundingBox3DAnnotations)
        boxes, class_ids, instance_ids = [], [], {}
        for i, ann in enumerate(list(annotations.annotations)):
            boxes.append(
                BoundingBox3D(
                    Pose.from_pose_proto(ann.box.pose),
                    np.float32([ann.box.width, ann.box.length,
                                ann.box.height]), ann.num_points,
                    ann.box.occlusion, ann.box.truncation))
            class_ids.append(json_category_id_to_contiguous_id[ann.class_id])
            instance_ids[ann.instance_id] = (boxes[i], class_ids[i])
        return boxes, class_ids, instance_ids
Esempio n. 3
0
    def __init__(self, K, D=None, p_cw=None):
        """Initialize camera with identity pose.

        Parameters
        ----------
        K: np.ndarray (3x3)
            Camera calibration matrix.

        D: np.ndarray (5,) or (H x W)
            Distortion parameters or distortion map.

        p_cw: dgp.utils.geometry.Pose
            Pose from world to camera frame.
        """
        self.K = K
        self.D = Distortion() if D is None else D
        self.p_cw = Pose() if p_cw is None else p_cw
        assert isinstance(self.K, np.ndarray)
        assert isinstance(self.p_cw, Pose)
Esempio n. 4
0
    def __getitem__(self, idx):
        """Get a dataset sample"""
        # Get DGP sample (if single sensor, make it a list)
        self.sample_dgp = self.dataset[idx]
        self.sample_dgp = [make_list(sample) for sample in self.sample_dgp]

        # Loop over all cameras
        sample = []
        for i in range(self.num_cameras):
            data = {
                'idx': idx,
                'dataset_idx': self.dataset_idx,
                'sensor_name': self.get_current('datum_name', i),
                #
                'filename': self.get_filename(idx, i),
                'splitname': '%s_%010d' % (self.split, idx),
                #
                'rgb': self.get_current('rgb', i),
                'intrinsics': self.get_current('intrinsics', i),
            }

            # If depth is returned
            if self.with_depth:
                data.update({
                    'depth': self.generate_depth_map(idx, i, data['filename'])
                })

            # If pose is returned
            if self.with_pose:
                data.update({
                    'extrinsics': self.get_current('extrinsics', i).matrix,
                    'pose': self.get_current('pose', i).matrix,
                })

            # If context is returned
            if self.has_context:
                data.update({
                    'rgb_context': self.get_context('rgb', i),
                })
                # If context pose is returned
                if self.with_pose:
                    # Get original values to calculate relative motion
                    orig_extrinsics = Pose.from_matrix(data['extrinsics'])
                    orig_pose = Pose.from_matrix(data['pose'])
                    data.update({
                        'extrinsics_context':
                            [(orig_extrinsics.inverse() * extrinsics).matrix
                             for extrinsics in self.get_context('extrinsics', i)],
                        'pose_context':
                            [(orig_pose.inverse() * pose).matrix
                             for pose in self.get_context('pose', i)],
                    })

            sample.append(data)

        # Apply same data transformations for all sensors
        if self.data_transform:
            sample = [self.data_transform(smp) for smp in sample]

        # Return sample (stacked if necessary)
        return stack_sample(sample)
Esempio n. 5
0
    def get_point_cloud_from_datum(self, scene_idx, sample_idx_in_scene,
                                   datum_idx_in_sample):
        """Get the sample image data from point cloud datum.

        Parameters
        ----------
        scene_idx: int
            Index of the scene.

        sample_idx_in_scene: int
            Index of the sample within the scene at scene_idx.

        datum_idx_in_sample: int
            Index of datum within sample datum keys

        Returns
        -------
        data: OrderedDict

            "timestamp": int
                Timestamp of the image in microseconds.

            "datum_name": str
                Sensor name from which the data was collected

            "extrinsics": Pose
                Sensor extrinsics with respect to the vehicle frame.

            "point_cloud": np.ndarray (N x 3)
                Point cloud in the local/world (L) frame returning X, Y and Z
                coordinates. The local frame is consistent across multiple
                timesteps in a scene.

            "extra_channels": np.ndarray (N x M)
                Remaining channels from point_cloud (i.e. lidar intensity I or pixel colors RGB)

            "pose": Pose
                Pose of sensor with respect to the world/global/local frame
                (reference frame that is initialized at start-time). (i.e. this
                provides the ego-pose in `pose_WS` where S refers to the point
                cloud sensor (S)).

            "bounding_box_3d": list of BoundingBox3D
                3D Bounding boxes for this sample specified in this point cloud
                sensor's reference frame. (i.e. this provides the bounding box
                (B) in the sensor's (S) reference frame `box_SB`).

            "class_ids": np.ndarray dtype=np.int64
                Tensor containing class ids (aligned with ``bounding_box_3d``)

            "instance_ids": np.ndarray dtype=np.int64
                Tensor containing instance ids (aligned with ``bounding_box_3d``)

        """
        datum = self.get_datum(scene_idx, sample_idx_in_scene,
                               datum_idx_in_sample)
        assert datum.datum.WhichOneof('datum_oneof') == 'point_cloud'

        # Determine the ego-pose of the lidar sensor (S) with respect to the world
        # (W) @ t=Ts
        pose_WS_Ts = Pose.from_pose_proto(datum.datum.point_cloud.pose) \
                     if hasattr(datum.datum.point_cloud, 'pose') else Pose()
        # Get sensor extrinsics for the datum name
        pose_VS = self.get_sensor_extrinsics(
            self.get_sample(scene_idx, sample_idx_in_scene).calibration_key,
            datum.id.name)

        # Points are described in the Lidar sensor (S) frame captured at the
        # corresponding lidar timestamp (Ts).
        # Points are in the lidar sensor's (S) frame.
        X_S, annotations = self.load_datum_and_annotations(
            scene_idx, sample_idx_in_scene, datum_idx_in_sample)
        data = OrderedDict({
            "timestamp": datum.id.timestamp.ToMicroseconds(),
            "datum_name": datum.id.name,
            "extrinsics": pose_VS,
            "pose": pose_WS_Ts,
            "point_cloud": X_S[:, :3],
            "extra_channels": X_S[:, 3:],
        })

        # Extract 3D bounding box labels, if requested.
        # Also checks if BOUNDING_BOX_3D annotation exists because some datasets have sparse annotations.
        if "bounding_box_3d" in self.requested_annotations and "bounding_box_3d" in annotations:
            annotation_data = load_bounding_box_3d_annotations(
                annotations, self.get_scene_directory(scene_idx),
                self.json_category_id_to_contiguous_id)
            data.update(annotation_data)
        return data
Esempio n. 6
0
    def get_image_from_datum(self, scene_idx, sample_idx_in_scene,
                             datum_idx_in_sample):
        """Get the sample image data from image datum.

        Parameters
        ----------
        scene_idx: int
            Index of the scene.

        sample_idx_in_scene: int
            Index of the sample within the scene at scene_idx.

        datum_idx_in_sample: int
            Index of datum within sample datum keys

        Returns
        -------
        data: OrderedDict

            "timestamp": int
                Timestamp of the image in microseconds.

            "datum_name": str
                Sensor name from which the data was collected

            "rgb": PIL.Image (mode=RGB)
                Image in RGB format.

            "intrinsics": np.ndarray
                Camera intrinsics.

            "extrinsics": Pose
                Camera extrinsics with respect to the vehicle frame.

            "pose": Pose
                Pose of sensor with respect to the world/global/local frame
                (reference frame that is initialized at start-time). (i.e. this
                provides the ego-pose in `pose_WC`).

            "bounding_box_2d": np.ndarray dtype=np.float32
                Tensor containing bounding boxes for this sample
                (x, y, w, h) in absolute pixel coordinates

            "bounding_box_3d": list of BoundingBox3D
                3D Bounding boxes for this sample specified in this camera's
                reference frame. (i.e. this provides the bounding box (B) in
                the camera's (C) reference frame `box_CB`).

            "class_ids": np.ndarray dtype=np.int64
                Tensor containing class ids (aligned with ``bounding_box_2d`` and ``bounding_box_3d``)

            "instance_ids": np.ndarray dtype=np.int64
                Tensor containing instance ids (aligned with ``bounding_box_2d`` and ``bounding_box_3d``)

        """
        datum = self.get_datum(scene_idx, sample_idx_in_scene,
                               datum_idx_in_sample)
        assert datum.datum.WhichOneof('datum_oneof') == 'image'

        # Get camera calibration and extrinsics for the datum name
        sample = self.get_sample(scene_idx, sample_idx_in_scene)
        camera = self.get_camera_calibration(sample.calibration_key,
                                             datum.id.name)
        pose_VC = self.get_sensor_extrinsics(sample.calibration_key,
                                             datum.id.name)

        # Get ego-pose for the image (at the corresponding image timestamp t=Tc)
        pose_WC_Tc = Pose.from_pose_proto(datum.datum.image.pose) \
                     if hasattr(datum.datum.image, 'pose') else Pose()

        # Populate data for image data
        image, annotations = self.load_datum_and_annotations(
            scene_idx, sample_idx_in_scene, datum_idx_in_sample)
        data = OrderedDict({
            "timestamp": datum.id.timestamp.ToMicroseconds(),
            "datum_name": datum.id.name,
            "rgb": image,
            "intrinsics": camera.K,
            "extrinsics": pose_VC,
            "pose": pose_WC_Tc
        })

        # Extract 2D/3D bounding box labels if requested
        # Also checks if BOUNDING_BOX_2D and BOUNDING_BOX_3D annotation exists because some datasets
        # have sparse annotations.
        if self.requested_annotations:
            ann_root_dir = self.get_scene_directory(scene_idx)

        # TODO: Load the datum based on the type, no need to hardcode these conditions. In particular,
        # figure out how to handle joint conditions like this:
        if "bounding_box_2d" in self.requested_annotations and "bounding_box_3d" in self.requested_annotations and "bounding_box_2d" in annotations and "bounding_box_3d" in annotations:
            annotation_data = load_aligned_bounding_box_annotations(
                annotations, ann_root_dir,
                self.json_category_id_to_contiguous_id)

        elif "bounding_box_2d" in self.requested_annotations and "bounding_box_2d" in annotations:
            annotation_data = load_bounding_box_2d_annotations(
                annotations, ann_root_dir,
                self.json_category_id_to_contiguous_id)

        elif "bounding_box_3d" in self.requested_annotations and "bounding_box_3d" in annotations:
            annotation_data = load_bounding_box_3d_annotations(
                annotations, ann_root_dir,
                self.json_category_id_to_contiguous_id)
        else:
            annotation_data = {}
        data.update(annotation_data)
        return data
Esempio n. 7
0
    def __getitem__(self, idx):
        """Get a dataset sample"""
        # Get DGP sample (if single sensor, make it a list)
        self.sample_dgp = self.dataset[idx]
        self.sample_dgp = [make_list(sample) for sample in self.sample_dgp]
        if self.with_geometric_context:
            self.sample_dgp_left = self.dataset_left[idx]
            self.sample_dgp_left = [
                make_list(sample) for sample in self.sample_dgp_left
            ]
            self.sample_dgp_right = self.dataset_right[idx]
            self.sample_dgp_right = [
                make_list(sample) for sample in self.sample_dgp_right
            ]

        # print('self.sample_dgp :')
        # print(self.sample_dgp)
        # print('self.sample_dgp_left :')
        # print(self.sample_dgp_left)
        # print('self.sample_dgp_right :')
        # print(self.sample_dgp_right)

        # Loop over all cameras
        sample = []
        for i in range(self.num_cameras):
            i_left = self.get_camera_idx_left(i)
            i_right = self.get_camera_idx_right(i)

            # print(self.get_current('datum_name', i))
            # print(self.get_filename(idx, i))
            # print(self.get_current('intrinsics', i))
            # print(self.with_depth)
            data = {
                'idx':
                idx,
                'dataset_idx':
                self.dataset_idx,
                'sensor_name':
                self.get_current('datum_name', i),
                #
                'filename':
                self.get_filename(idx, i),
                'splitname':
                '%s_%010d' % (self.split, idx),
                #
                'rgb':
                self.get_current('rgb', i),
                'intrinsics':
                self.get_current('intrinsics', i),
                'extrinsics':
                self.get_current('extrinsics', i).matrix,
                'path_to_ego_mask':
                os.path.join(
                    os.path.dirname(self.path),
                    self._get_path_to_ego_mask(self.get_filename(idx, i))),
            }

            # If depth is returned
            if self.with_depth:
                data.update({
                    'depth':
                    self.generate_depth_map(idx, i, data['filename'])
                })

            # If pose is returned
            if self.with_pose:
                data.update({
                    'pose': self.get_current('pose', i).matrix,
                })

            if self.has_context:
                orig_extrinsics = Pose.from_matrix(data['extrinsics'])
                data.update({
                    'rgb_context':
                    self.get_context('rgb', i),
                    'intrinsics_context':
                    self.get_context('intrinsics', i),
                    'extrinsics_context':
                    [(extrinsics.inverse() * orig_extrinsics).matrix
                     for extrinsics in self.get_context('extrinsics', i)],
                })
                data.update({
                    'path_to_ego_mask_context': [
                        os.path.join(
                            os.path.dirname(self.path),
                            self._get_path_to_ego_mask(
                                self.get_filename(idx, i)))
                        for _ in range(len(data['rgb_context']))
                    ],
                })
                data.update({
                    'context_type': [],
                })
                for _ in range(self.bwd):
                    data['context_type'].append('backward')

                for _ in range(self.fwd):
                    data['context_type'].append('forward')

                # If context pose is returned
                if self.with_pose:
                    # Get original values to calculate relative motion
                    orig_pose = Pose.from_matrix(data['pose'])
                    data.update({
                        'pose_context':
                        [(orig_pose.inverse() * pose).matrix
                         for pose in self.get_context('pose', i)],
                    })

            if self.with_geometric_context:
                orig_extrinsics = data['extrinsics']
                #orig_extrinsics[:3,3] = -np.dot(orig_extrinsics[:3,:3].transpose(), orig_extrinsics[:3,3])

                orig_extrinsics_left = self.get_current_left(
                    'extrinsics', i_left).matrix
                orig_extrinsics_right = self.get_current_right(
                    'extrinsics', i_right).matrix

                #orig_extrinsics_left[:3,3] = -np.dot(orig_extrinsics_left[:3,:3].transpose(), orig_extrinsics_left[:3,3])
                #orig_extrinsics_right[:3,3] = -np.dot(orig_extrinsics_right[:3,:3].transpose(), orig_extrinsics_right[:3,3])

                orig_extrinsics = Pose.from_matrix(orig_extrinsics)
                orig_extrinsics_left = Pose.from_matrix(orig_extrinsics_left)
                orig_extrinsics_right = Pose.from_matrix(orig_extrinsics_right)

                data['rgb_context'].append(self.get_current_left(
                    'rgb', i_left))
                data['rgb_context'].append(
                    self.get_current_right('rgb', i_right))

                data['intrinsics_context'].append(
                    self.get_current_left('intrinsics', i_left))
                data['intrinsics_context'].append(
                    self.get_current_right('intrinsics', i_right))

                data['extrinsics_context'].append(
                    (orig_extrinsics_left.inverse() * orig_extrinsics).matrix)
                data['extrinsics_context'].append(
                    (orig_extrinsics_right.inverse() * orig_extrinsics).matrix)

                #data['extrinsics_context'].append((orig_extrinsics.inverse() * orig_extrinsics_left).matrix)
                #data['extrinsics_context'].append((orig_extrinsics.inverse() * orig_extrinsics_right).matrix)

                data['path_to_ego_mask_context'].append(
                    os.path.join(
                        os.path.dirname(self.path),
                        self._get_path_to_ego_mask(
                            self.get_filename_left(idx, i_left))))
                data['path_to_ego_mask_context'].append(
                    os.path.join(
                        os.path.dirname(self.path),
                        self._get_path_to_ego_mask(
                            self.get_filename_right(idx, i_right))))

                data['context_type'].append('left')
                data['context_type'].append('right')

                data.update({
                    'sensor_name_left':
                    self.get_current_left('datum_name', i_left),
                    'sensor_name_right':
                    self.get_current_right('datum_name', i_right),
                    #
                    'filename_left':
                    self.get_filename_left(idx, i_left),
                    'filename_right':
                    self.get_filename_right(idx, i_right),
                    #
                    #'rgb_left': self.get_current_left('rgb', i),
                    #'rgb_right': self.get_current_right('rgb', i),
                    #'intrinsics_left': self.get_current_left('intrinsics', i),
                    #'intrinsics_right': self.get_current_right('intrinsics', i),
                    #'extrinsics_left': self.get_current_left('extrinsics', i).matrix,
                    #'extrinsics_right': self.get_current_right('extrinsics', i).matrix,
                    #'path_to_ego_mask_left': self._get_path_to_ego_mask(self.get_filename_left(idx, i)),
                    #'path_to_ego_mask_right': self._get_path_to_ego_mask(self.get_filename_right(idx, i)),
                })

                # data.update({
                #     'extrinsics_context_left':
                #         [(orig_extrinsics_left.inverse() * extrinsics_left).matrix
                #          for extrinsics_left in self.get_context_left('extrinsics', i)],
                #     'extrinsics_context_right':
                #         [(orig_extrinsics_right.inverse() * extrinsics_right).matrix
                #          for extrinsics_right in self.get_context_right('extrinsics', i)],
                #     'intrinsics_context_left': self.get_context_left('intrinsics', i),
                #     'intrinsics_context_right': self.get_context_right('intrinsics', i),
                # })

            sample.append(data)

        # Apply same data transformations for all sensors
        if self.data_transform:
            sample = [self.data_transform(smp) for smp in sample]

        # Return sample (stacked if necessary)
        return stack_sample(sample)