def coalesce_pc_data(self, items): """Combine set of point cloud datums into a single point cloud. Parameters ---------- items: list List of OrderedDict, containing parsed point cloud or image data. Returns ------- coalesced_pc: OrderedDict OrderedDict containing coalesced point cloud and associated metadata. """ pc_items = [item for item in items if POINT_CLOUD_KEY in item] assert self.coalesce_point_cloud assert len(pc_items) == len(LIDAR_DATUM_NAMES) # Only coalesce if there's more than 1 point cloud coalesced_pc = OrderedDict() X_V_merged, bbox_3d_V_merged, class_ids_merged, instance_ids_merged = [], [], [], [] for item in pc_items: X_S = item[POINT_CLOUD_KEY] p_VS = item['extrinsics'] X_V_merged.append(p_VS * X_S) if 'bounding_box_3d' in item: bbox_3d_V_merged.extend([p_VS * bbox_3d for bbox_3d in item['bounding_box_3d']]) class_ids_merged.append(item['class_ids']) instance_ids_merged.append(item['instance_ids']) coalesced_pc['datum_name'] = COALESCED_LIDAR_DATUM_NAME coalesced_pc['timestamp'] = pc_items[0]['timestamp'] coalesced_pc[POINT_CLOUD_KEY] = np.vstack(X_V_merged) coalesced_pc['extra_channels'] = np.vstack([item['extra_channels'] for item in pc_items]) # Note: Pose for the coalesced point cloud is identical to pose_LV # Note: Extrinsics for this "virtual" datum is identity since the point cloud is defined in the (V)ehicle frame of reference. coalesced_pc['extrinsics'] = Pose() p_LS = pc_items[0]['pose'] p_VS = pc_items[0]['extrinsics'] p_LV = p_LS * p_VS.inverse() coalesced_pc['pose'] = p_LV if len(bbox_3d_V_merged): # Add bounding boxes and metadata in the vehicle frame coalesced_pc['bounding_box_3d'] = bbox_3d_V_merged coalesced_pc['class_ids'] = np.hstack(class_ids_merged) coalesced_pc['instance_ids'] = np.hstack(instance_ids_merged) # Keep only the unique instance IDs coalesced_pc['instance_ids'], unique_idx = np.unique(coalesced_pc['instance_ids'], return_index=True) coalesced_pc['bounding_box_3d'] = [ bbox_3d for idx, bbox_3d in enumerate(coalesced_pc['bounding_box_3d']) if idx in unique_idx ] coalesced_pc['class_ids'] = np.array([ bbox_3d for idx, bbox_3d in enumerate(coalesced_pc['class_ids']) if idx in unique_idx ]) assert len(coalesced_pc['bounding_box_3d']) == len(coalesced_pc['class_ids']) assert len(coalesced_pc['bounding_box_3d']) == len(coalesced_pc['instance_ids']) return coalesced_pc
def parse_annotations_3d_proto(annotation_file, json_category_id_to_contiguous_id): """Parse annotations from BoundingBox2DAnnotations structure. Parameters ---------- annotations: str Path to JSON file containing annotations for 2D bounding boxes json_category_id_to_contiguous_id: dict Lookup from COCO style JSON id's to contiguous id's transformation: Pose Pose object that can be used to convert annotations to a new reference frame. Returns ------- tuple holding: boxes: list of BoundingBox3D Tensor containing bounding boxes for this sample (pose.quat.qw, pose.quat.qx, pose.quat.qy, pose.quat.qz, pose.tvec.x, pose.tvec.y, pose.tvec.z, width, length, height) in absolute scale class_ids: np.int64 array Numpy array containing class ids (aligned with ``boxes``) instance_ids: dict Map from instance_id to tuple of (box, class_id) """ # *CAVEAT*: `attributes` field is defined in proto, but not being read here. # TODO: read attributes (see above); change outputs of all function calls. with open(annotation_file) as _f: annotations = open_pbobject(annotation_file, BoundingBox3DAnnotations) boxes, class_ids, instance_ids = [], [], {} for i, ann in enumerate(list(annotations.annotations)): boxes.append( BoundingBox3D( Pose.from_pose_proto(ann.box.pose), np.float32([ann.box.width, ann.box.length, ann.box.height]), ann.num_points, ann.box.occlusion, ann.box.truncation)) class_ids.append(json_category_id_to_contiguous_id[ann.class_id]) instance_ids[ann.instance_id] = (boxes[i], class_ids[i]) return boxes, class_ids, instance_ids
def __init__(self, K, D=None, p_cw=None): """Initialize camera with identity pose. Parameters ---------- K: np.ndarray (3x3) Camera calibration matrix. D: np.ndarray (5,) or (H x W) Distortion parameters or distortion map. p_cw: dgp.utils.geometry.Pose Pose from world to camera frame. """ self.K = K self.D = Distortion() if D is None else D self.p_cw = Pose() if p_cw is None else p_cw assert isinstance(self.K, np.ndarray) assert isinstance(self.p_cw, Pose)
def __getitem__(self, idx): """Get a dataset sample""" # Get DGP sample (if single sensor, make it a list) self.sample_dgp = self.dataset[idx] self.sample_dgp = [make_list(sample) for sample in self.sample_dgp] # Loop over all cameras sample = [] for i in range(self.num_cameras): data = { 'idx': idx, 'dataset_idx': self.dataset_idx, 'sensor_name': self.get_current('datum_name', i), # 'filename': self.get_filename(idx, i), 'splitname': '%s_%010d' % (self.split, idx), # 'rgb': self.get_current('rgb', i), 'intrinsics': self.get_current('intrinsics', i), } # If depth is returned if self.with_depth: data.update({ 'depth': self.generate_depth_map(idx, i, data['filename']) }) # If pose is returned if self.with_pose: data.update({ 'extrinsics': self.get_current('extrinsics', i).matrix, 'pose': self.get_current('pose', i).matrix, }) # If context is returned if self.has_context: data.update({ 'rgb_context': self.get_context('rgb', i), }) # If context pose is returned if self.with_pose: # Get original values to calculate relative motion orig_extrinsics = Pose.from_matrix(data['extrinsics']) orig_pose = Pose.from_matrix(data['pose']) data.update({ 'extrinsics_context': [(orig_extrinsics.inverse() * extrinsics).matrix for extrinsics in self.get_context('extrinsics', i)], 'pose_context': [(orig_pose.inverse() * pose).matrix for pose in self.get_context('pose', i)], }) sample.append(data) # Apply same data transformations for all sensors if self.data_transform: sample = [self.data_transform(smp) for smp in sample] # Return sample (stacked if necessary) return stack_sample(sample)
def get_point_cloud_from_datum(self, scene_idx, sample_idx_in_scene, datum_idx_in_sample): """Get the sample image data from point cloud datum. Parameters ---------- scene_idx: int Index of the scene. sample_idx_in_scene: int Index of the sample within the scene at scene_idx. datum_idx_in_sample: int Index of datum within sample datum keys Returns ------- data: OrderedDict "timestamp": int Timestamp of the image in microseconds. "datum_name": str Sensor name from which the data was collected "extrinsics": Pose Sensor extrinsics with respect to the vehicle frame. "point_cloud": np.ndarray (N x 3) Point cloud in the local/world (L) frame returning X, Y and Z coordinates. The local frame is consistent across multiple timesteps in a scene. "extra_channels": np.ndarray (N x M) Remaining channels from point_cloud (i.e. lidar intensity I or pixel colors RGB) "pose": Pose Pose of sensor with respect to the world/global/local frame (reference frame that is initialized at start-time). (i.e. this provides the ego-pose in `pose_WS` where S refers to the point cloud sensor (S)). "bounding_box_3d": list of BoundingBox3D 3D Bounding boxes for this sample specified in this point cloud sensor's reference frame. (i.e. this provides the bounding box (B) in the sensor's (S) reference frame `box_SB`). "class_ids": np.ndarray dtype=np.int64 Tensor containing class ids (aligned with ``bounding_box_3d``) "instance_ids": np.ndarray dtype=np.int64 Tensor containing instance ids (aligned with ``bounding_box_3d``) """ datum = self.get_datum(scene_idx, sample_idx_in_scene, datum_idx_in_sample) assert datum.datum.WhichOneof('datum_oneof') == 'point_cloud' # Determine the ego-pose of the lidar sensor (S) with respect to the world # (W) @ t=Ts pose_WS_Ts = Pose.from_pose_proto(datum.datum.point_cloud.pose) \ if hasattr(datum.datum.point_cloud, 'pose') else Pose() # Get sensor extrinsics for the datum name pose_VS = self.get_sensor_extrinsics( self.get_sample(scene_idx, sample_idx_in_scene).calibration_key, datum.id.name) # Points are described in the Lidar sensor (S) frame captured at the # corresponding lidar timestamp (Ts). # Points are in the lidar sensor's (S) frame. X_S, annotations = self.load_datum_and_annotations( scene_idx, sample_idx_in_scene, datum_idx_in_sample) data = OrderedDict({ "timestamp": datum.id.timestamp.ToMicroseconds(), "datum_name": datum.id.name, "extrinsics": pose_VS, "pose": pose_WS_Ts, "point_cloud": X_S[:, :3], "extra_channels": X_S[:, 3:], }) # Extract 3D bounding box labels, if requested. # Also checks if BOUNDING_BOX_3D annotation exists because some datasets have sparse annotations. if "bounding_box_3d" in self.requested_annotations and "bounding_box_3d" in annotations: annotation_data = load_bounding_box_3d_annotations( annotations, self.get_scene_directory(scene_idx), self.json_category_id_to_contiguous_id) data.update(annotation_data) return data
def get_image_from_datum(self, scene_idx, sample_idx_in_scene, datum_idx_in_sample): """Get the sample image data from image datum. Parameters ---------- scene_idx: int Index of the scene. sample_idx_in_scene: int Index of the sample within the scene at scene_idx. datum_idx_in_sample: int Index of datum within sample datum keys Returns ------- data: OrderedDict "timestamp": int Timestamp of the image in microseconds. "datum_name": str Sensor name from which the data was collected "rgb": PIL.Image (mode=RGB) Image in RGB format. "intrinsics": np.ndarray Camera intrinsics. "extrinsics": Pose Camera extrinsics with respect to the vehicle frame. "pose": Pose Pose of sensor with respect to the world/global/local frame (reference frame that is initialized at start-time). (i.e. this provides the ego-pose in `pose_WC`). "bounding_box_2d": np.ndarray dtype=np.float32 Tensor containing bounding boxes for this sample (x, y, w, h) in absolute pixel coordinates "bounding_box_3d": list of BoundingBox3D 3D Bounding boxes for this sample specified in this camera's reference frame. (i.e. this provides the bounding box (B) in the camera's (C) reference frame `box_CB`). "class_ids": np.ndarray dtype=np.int64 Tensor containing class ids (aligned with ``bounding_box_2d`` and ``bounding_box_3d``) "instance_ids": np.ndarray dtype=np.int64 Tensor containing instance ids (aligned with ``bounding_box_2d`` and ``bounding_box_3d``) """ datum = self.get_datum(scene_idx, sample_idx_in_scene, datum_idx_in_sample) assert datum.datum.WhichOneof('datum_oneof') == 'image' # Get camera calibration and extrinsics for the datum name sample = self.get_sample(scene_idx, sample_idx_in_scene) camera = self.get_camera_calibration(sample.calibration_key, datum.id.name) pose_VC = self.get_sensor_extrinsics(sample.calibration_key, datum.id.name) # Get ego-pose for the image (at the corresponding image timestamp t=Tc) pose_WC_Tc = Pose.from_pose_proto(datum.datum.image.pose) \ if hasattr(datum.datum.image, 'pose') else Pose() # Populate data for image data image, annotations = self.load_datum_and_annotations( scene_idx, sample_idx_in_scene, datum_idx_in_sample) data = OrderedDict({ "timestamp": datum.id.timestamp.ToMicroseconds(), "datum_name": datum.id.name, "rgb": image, "intrinsics": camera.K, "extrinsics": pose_VC, "pose": pose_WC_Tc }) # Extract 2D/3D bounding box labels if requested # Also checks if BOUNDING_BOX_2D and BOUNDING_BOX_3D annotation exists because some datasets # have sparse annotations. if self.requested_annotations: ann_root_dir = self.get_scene_directory(scene_idx) # TODO: Load the datum based on the type, no need to hardcode these conditions. In particular, # figure out how to handle joint conditions like this: if "bounding_box_2d" in self.requested_annotations and "bounding_box_3d" in self.requested_annotations and "bounding_box_2d" in annotations and "bounding_box_3d" in annotations: annotation_data = load_aligned_bounding_box_annotations( annotations, ann_root_dir, self.json_category_id_to_contiguous_id) elif "bounding_box_2d" in self.requested_annotations and "bounding_box_2d" in annotations: annotation_data = load_bounding_box_2d_annotations( annotations, ann_root_dir, self.json_category_id_to_contiguous_id) elif "bounding_box_3d" in self.requested_annotations and "bounding_box_3d" in annotations: annotation_data = load_bounding_box_3d_annotations( annotations, ann_root_dir, self.json_category_id_to_contiguous_id) else: annotation_data = {} data.update(annotation_data) return data
def __getitem__(self, idx): """Get a dataset sample""" # Get DGP sample (if single sensor, make it a list) self.sample_dgp = self.dataset[idx] self.sample_dgp = [make_list(sample) for sample in self.sample_dgp] if self.with_geometric_context: self.sample_dgp_left = self.dataset_left[idx] self.sample_dgp_left = [ make_list(sample) for sample in self.sample_dgp_left ] self.sample_dgp_right = self.dataset_right[idx] self.sample_dgp_right = [ make_list(sample) for sample in self.sample_dgp_right ] # print('self.sample_dgp :') # print(self.sample_dgp) # print('self.sample_dgp_left :') # print(self.sample_dgp_left) # print('self.sample_dgp_right :') # print(self.sample_dgp_right) # Loop over all cameras sample = [] for i in range(self.num_cameras): i_left = self.get_camera_idx_left(i) i_right = self.get_camera_idx_right(i) # print(self.get_current('datum_name', i)) # print(self.get_filename(idx, i)) # print(self.get_current('intrinsics', i)) # print(self.with_depth) data = { 'idx': idx, 'dataset_idx': self.dataset_idx, 'sensor_name': self.get_current('datum_name', i), # 'filename': self.get_filename(idx, i), 'splitname': '%s_%010d' % (self.split, idx), # 'rgb': self.get_current('rgb', i), 'intrinsics': self.get_current('intrinsics', i), 'extrinsics': self.get_current('extrinsics', i).matrix, 'path_to_ego_mask': os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask(self.get_filename(idx, i))), } # If depth is returned if self.with_depth: data.update({ 'depth': self.generate_depth_map(idx, i, data['filename']) }) # If pose is returned if self.with_pose: data.update({ 'pose': self.get_current('pose', i).matrix, }) if self.has_context: orig_extrinsics = Pose.from_matrix(data['extrinsics']) data.update({ 'rgb_context': self.get_context('rgb', i), 'intrinsics_context': self.get_context('intrinsics', i), 'extrinsics_context': [(extrinsics.inverse() * orig_extrinsics).matrix for extrinsics in self.get_context('extrinsics', i)], }) data.update({ 'path_to_ego_mask_context': [ os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask( self.get_filename(idx, i))) for _ in range(len(data['rgb_context'])) ], }) data.update({ 'context_type': [], }) for _ in range(self.bwd): data['context_type'].append('backward') for _ in range(self.fwd): data['context_type'].append('forward') # If context pose is returned if self.with_pose: # Get original values to calculate relative motion orig_pose = Pose.from_matrix(data['pose']) data.update({ 'pose_context': [(orig_pose.inverse() * pose).matrix for pose in self.get_context('pose', i)], }) if self.with_geometric_context: orig_extrinsics = data['extrinsics'] #orig_extrinsics[:3,3] = -np.dot(orig_extrinsics[:3,:3].transpose(), orig_extrinsics[:3,3]) orig_extrinsics_left = self.get_current_left( 'extrinsics', i_left).matrix orig_extrinsics_right = self.get_current_right( 'extrinsics', i_right).matrix #orig_extrinsics_left[:3,3] = -np.dot(orig_extrinsics_left[:3,:3].transpose(), orig_extrinsics_left[:3,3]) #orig_extrinsics_right[:3,3] = -np.dot(orig_extrinsics_right[:3,:3].transpose(), orig_extrinsics_right[:3,3]) orig_extrinsics = Pose.from_matrix(orig_extrinsics) orig_extrinsics_left = Pose.from_matrix(orig_extrinsics_left) orig_extrinsics_right = Pose.from_matrix(orig_extrinsics_right) data['rgb_context'].append(self.get_current_left( 'rgb', i_left)) data['rgb_context'].append( self.get_current_right('rgb', i_right)) data['intrinsics_context'].append( self.get_current_left('intrinsics', i_left)) data['intrinsics_context'].append( self.get_current_right('intrinsics', i_right)) data['extrinsics_context'].append( (orig_extrinsics_left.inverse() * orig_extrinsics).matrix) data['extrinsics_context'].append( (orig_extrinsics_right.inverse() * orig_extrinsics).matrix) #data['extrinsics_context'].append((orig_extrinsics.inverse() * orig_extrinsics_left).matrix) #data['extrinsics_context'].append((orig_extrinsics.inverse() * orig_extrinsics_right).matrix) data['path_to_ego_mask_context'].append( os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask( self.get_filename_left(idx, i_left)))) data['path_to_ego_mask_context'].append( os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask( self.get_filename_right(idx, i_right)))) data['context_type'].append('left') data['context_type'].append('right') data.update({ 'sensor_name_left': self.get_current_left('datum_name', i_left), 'sensor_name_right': self.get_current_right('datum_name', i_right), # 'filename_left': self.get_filename_left(idx, i_left), 'filename_right': self.get_filename_right(idx, i_right), # #'rgb_left': self.get_current_left('rgb', i), #'rgb_right': self.get_current_right('rgb', i), #'intrinsics_left': self.get_current_left('intrinsics', i), #'intrinsics_right': self.get_current_right('intrinsics', i), #'extrinsics_left': self.get_current_left('extrinsics', i).matrix, #'extrinsics_right': self.get_current_right('extrinsics', i).matrix, #'path_to_ego_mask_left': self._get_path_to_ego_mask(self.get_filename_left(idx, i)), #'path_to_ego_mask_right': self._get_path_to_ego_mask(self.get_filename_right(idx, i)), }) # data.update({ # 'extrinsics_context_left': # [(orig_extrinsics_left.inverse() * extrinsics_left).matrix # for extrinsics_left in self.get_context_left('extrinsics', i)], # 'extrinsics_context_right': # [(orig_extrinsics_right.inverse() * extrinsics_right).matrix # for extrinsics_right in self.get_context_right('extrinsics', i)], # 'intrinsics_context_left': self.get_context_left('intrinsics', i), # 'intrinsics_context_right': self.get_context_right('intrinsics', i), # }) sample.append(data) # Apply same data transformations for all sensors if self.data_transform: sample = [self.data_transform(smp) for smp in sample] # Return sample (stacked if necessary) return stack_sample(sample)