def inference_by_idx(): global BACKEND instance = request.json response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") if BACKEND.kitti_infos is None: return error_response("kitti info is not loaded") if BACKEND.inference_ctx is None: return error_response("inference_ctx is not loaded") image_idx = instance["image_idx"] idx = BACKEND.image_idxes.index(image_idx) kitti_info = BACKEND.kitti_infos[idx] v_path = str(Path(BACKEND.root_path) / kitti_info['velodyne_path']) num_features = 4 points = np.fromfile( str(v_path), dtype=np.float32, count=-1).reshape([-1, num_features]) rect = kitti_info['calib/R0_rect'] P2 = kitti_info['calib/P2'] Trv2c = kitti_info['calib/Tr_velo_to_cam'] if 'img_shape' in kitti_info: image_shape = kitti_info['img_shape'] points = box_np_ops.remove_outside_points( points, rect, Trv2c, P2, image_shape) print(points.shape[0]) t = time.time() inputs = BACKEND.inference_ctx.get_inference_input_dict( kitti_info, points) print("input preparation time:", time.time() - t) t = time.time() with BACKEND.inference_ctx.ctx(): dt_annos = BACKEND.inference_ctx.inference(inputs)[0] print("detection time:", time.time() - t) dims = dt_annos['dimensions'] num_obj = dims.shape[0] loc = dt_annos['location'] rots = dt_annos['rotation_y'] labels = dt_annos['name'] dt_boxes_camera = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) dt_boxes = box_np_ops.box_camera_to_lidar( dt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = dt_boxes[:, :3] dims = dt_boxes[:, 3:6] rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1) response["dt_locs"] = locs.tolist() response["dt_dims"] = dims.tolist() response["dt_rots"] = rots.tolist() response["dt_labels"] = labels.tolist() response["dt_scores"] = dt_annos["score"].tolist() response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' return response
def kitti_anno_to_corners(info, annos=None): rect = info['calib/R0_rect'] P2 = info['calib/P2'] Tr_velo_to_cam = info['calib/Tr_velo_to_cam'] if annos is None: annos = info['annos'] dims = annos['dimensions'] loc = annos['location'] rots = annos['rotation_y'] scores = None if 'score' in annos: scores = annos['score'] boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) boxes_lidar = box_np_ops.box_camera_to_lidar(boxes_camera, rect, Tr_velo_to_cam) box_np_ops.change_box3d_center_(boxes_lidar, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) #pang added 02/02/2019 to make the position of the bounding boxes correct boxes_corners = box_np_ops.center_to_corner_box3d( boxes_lidar[:, :3], boxes_lidar[:, 3:6], boxes_lidar[:, 6], origin=[0.5, 0.5, 0], axis=2) return boxes_corners, scores, boxes_lidar
def get_pointcloud(): global BACKEND instance = request.json response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") if BACKEND.kitti_infos is None: return error_response("kitti info is not loaded") image_idx = instance["image_idx"] print("image_idx",image_idx) idx = BACKEND.image_idxes.index(image_idx) kitti_info = BACKEND.kitti_infos[idx] rect = kitti_info['calib/R0_rect'] P2 = kitti_info['calib/P2'] Trv2c = kitti_info['calib/Tr_velo_to_cam'] img_shape = kitti_info["img_shape"] # hw wh = np.array(img_shape[::-1]) whwh = np.tile(wh, 2) if 'annos' in kitti_info: annos = kitti_info['annos'] labels = annos['name'] num_obj = len([n for n in annos['name'] if n != 'DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] bbox = annos['bbox'][:num_obj] / whwh gt_boxes_camera = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(gt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = gt_boxes[:, :3] dims = gt_boxes[:, 3:6] rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -gt_boxes[:, 6:7]], axis=1) frontend_annos = {} response["locs"] = locs.tolist() response["dims"] = dims.tolist() response["rots"] = rots.tolist() response["bbox"] = bbox.tolist() response["labels"] = labels[:num_obj].tolist() v_path = str(Path(BACKEND.root_path) / kitti_info['velodyne_path']) print("v_path:",v_path) with open(v_path, 'rb') as f: pc_str = base64.encodebytes(f.read()) response["pointcloud"] = pc_str.decode("utf-8") if "with_det" in instance and instance["with_det"]: if BACKEND.dt_annos is None: return error_response("det anno is not loaded") dt_annos = BACKEND.dt_annos[idx] dims = dt_annos['dimensions'] num_obj = dims.shape[0] loc = dt_annos['location'] rots = dt_annos['rotation_y'] bbox = dt_annos['bbox'] / whwh labels = dt_annos['name'] dt_boxes_camera = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) dt_boxes = box_np_ops.box_camera_to_lidar( dt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = dt_boxes[:, :3] dims = dt_boxes[:, 3:6] rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1) response["dt_locs"] = locs.tolist() response["dt_dims"] = dims.tolist() response["dt_rots"] = rots.tolist() response["dt_labels"] = labels.tolist() response["dt_bbox"] = bbox.tolist() response["dt_scores"] = dt_annos["score"].tolist() # if "score" in annos: # response["score"] = score.tolist() response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' print("send response!") return response
def create_groundtruth_database(data_path, info_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, lidar_only=False, bev_only=False, coors_range=None): root_path = pathlib.Path(data_path) if info_path is None: info_path = root_path / 'kitti_infos_train.pkl' if database_save_path is None: database_save_path = root_path / 'gt_database' else: database_save_path = pathlib.Path(database_save_path) if db_info_save_path is None: db_info_save_path = root_path / "kitti_dbinfos_train.pkl" database_save_path.mkdir(parents=True, exist_ok=True) with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) all_db_infos = {} if used_classes is None: used_classes = list(kitti.get_classes()) used_classes.pop(used_classes.index('DontCare')) for name in used_classes: all_db_infos[name] = [] group_counter = 0 for info in prog_bar(kitti_infos): pc_info = info["point_cloud"] image_info = info["image"] calib = info["calib"] velodyne_path = pc_info['velodyne_path'] if relative_path: # velodyne_path = str(root_path / velodyne_path) + "_reduced" velodyne_path = str(root_path / velodyne_path) num_features = 4 if 'num_features' in pc_info: num_features = pc_info['num_features'] points = np.fromfile( velodyne_path, dtype=np.float32, count=-1).reshape([-1, num_features]) image_idx = image_info["image_idx"] rect = calib['R0_rect'] P2 = calib['P2'] Trv2c = calib['Tr_velo_to_cam'] if not lidar_only: points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_info["image_shape"]) annos = info["annos"] names = annos["name"] bboxes = annos["bbox"] difficulty = annos["difficulty"] gt_idxes = annos["index"] num_obj = np.sum(annos["index"] >= 0) rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj] rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c) box_np_ops.change_box3d_center_(rbbox_lidar, [0.5, 0.5, 0], [0.5, 0.5, 0.5]) if bev_only: # set z and h to limits assert coors_range is not None rbbox_lidar[:, 2] = coors_range[2] rbbox_lidar[:, 5] = coors_range[5] - coors_range[2] group_dict = {} group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64) if "group_ids" in annos: group_ids = annos["group_ids"] else: group_ids = np.arange(bboxes.shape[0], dtype=np.int64) point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar) for i in range(num_obj): filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin" filepath = database_save_path / filename gt_points = points[point_indices[:, i]] gt_points[:, :3] -= rbbox_lidar[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) if names[i] in used_classes: if relative_path: db_path = str(database_save_path.stem + "/" + filename) else: db_path = str(filepath) db_info = { "name": names[i], "path": db_path, "image_idx": image_idx, "gt_idx": gt_idxes[i], "box3d_lidar": rbbox_lidar[i], "num_points_in_gt": gt_points.shape[0], "difficulty": difficulty[i], # "group_id": -1, # "bbox": bboxes[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info["group_id"] = group_dict[local_group_id] if "score" in annos: db_info["score"] = annos["score"][i] all_db_infos[names[i]].append(db_info) for k, v in all_db_infos.items(): print(f"load {len(v)} {k} database infos") with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def get_sensor_data(self, query): read_image = False idx = query if isinstance(query, dict): read_image = "cam" in query assert "lidar" in query idx = query["lidar"]["idx"] info = self._kitti_infos[idx] res = { "lidar": { "type": "lidar", "points": None, }, "metadata": { "image_idx": info["image"]["image_idx"], "image_shape": info["image"]["image_shape"], }, "calib": None, "cam": {} } pc_info = info["point_cloud"] velo_path = Path(pc_info['velodyne_path']) if not velo_path.is_absolute(): velo_path = Path(self._root_path) / pc_info['velodyne_path'] velo_reduced_path = velo_path.parent.parent / ( velo_path.parent.stem + '_reduced') / velo_path.name if velo_reduced_path.exists(): velo_path = velo_reduced_path points = np.fromfile(str(velo_path), dtype=np.float32, count=-1).reshape([-1, self.NumPointFeatures]) res["lidar"]["points"] = points image_info = info["image"] image_path = image_info['image_path'] if read_image: image_path = self._root_path / image_path with open(str(image_path), 'rb') as f: image_str = f.read() res["cam"] = { "type": "camera", "data": image_str, "datatype": image_path.suffix[1:], } calib = info["calib"] calib_dict = { 'rect': calib['R0_rect'], 'Trv2c': calib['Tr_velo_to_cam'], 'P2': calib['P2'], } res["calib"] = calib_dict if 'annos' in info: annos = info['annos'] # we need other objects to avoid collision when sample annos = kitti.remove_dontcare(annos) locs = annos["location"] dims = annos["dimensions"] rots = annos["rotation_y"] gt_names = annos["name"] # rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), rots], axis=1) gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) calib = info["calib"] gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, calib["R0_rect"], calib["Tr_velo_to_cam"]) # only center format is allowed. so we need to convert # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5] box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0], [0.5, 0.5, 0.5]) res["lidar"]["annotations"] = { 'boxes': gt_boxes, 'names': gt_names, } res["cam"]["annotations"] = { 'boxes': annos["bbox"], 'names': gt_names, } return res
def get_sensor_data(self, query): read_image = False colored_lidar = False create_init_lidar = True idx = query if isinstance(query, dict): colored_lidar = "colored" in query["lidar"] read_image = ("cam" in query) or colored_lidar create_init_lidar = "init_lidar" in query if create_init_lidar: assert "num_beams" in query["init_lidar"] assert "lidar" in query idx = query["lidar"]["idx"] info = self._vkitti_infos[idx] res = { "lidar": { "type": "lidar", "points": None, "colored": False }, "metadata": { "image_idx": info["image"]["image_idx"], "image_shape": info["image"]["image_shape"], }, "calib": None, "cam": {}, "depth": { "type": "depth_map", "image": None }, "init_lidar": { "num_beams": None, "points": None } } image_info = info["image"] image_path = image_info['image_path'] if read_image: image_path = self._root_path / image_path with open(str(image_path), 'rb') as f: image_str = f.read() res["cam"] = { "type": "camera", "data": image_str, "datatype": image_path.suffix[1:], } # Will always produce depth, needed for lidar. depth_info = info["depth"] depth_path = Path(depth_info["depth_path"]) if not depth_path.is_absolute(): depth_path = Path(self._root_path) / depth_info["depth_path"] np_depth_image = np.array( Image.open(depth_path)) # (W, H) dtype=np.int32 # Convert centimeters (int32) to meters. np_depth_image = np_depth_image.astype( np.float32) / 100 # (W, H) dtype=np.float32 res["depth"] = {"type": "depth_map", "image": np_depth_image} if colored_lidar: # Concatenate depth map with colors. np_rgb_image = np.array(Image.open( io.BytesIO(image_str))) # (H, W, 3) H, W = np_depth_image.shape np_depth_image = np.concatenate( (np_depth_image.reshape(H, W, 1), np_rgb_image), axis=2) # (H, W, 4) res["lidar"]["colored"] = True else: np_depth_image = np_depth_image[..., np.newaxis] # (H, W, 1) np_depth_image = np.concatenate( (np_depth_image, np.ones_like(np_depth_image)), axis=2) # (H, W, 2) points = cam_transforms.depth_map_to_point_cloud( np_depth_image, self.lc_device.CAMERA_PARAMS['matrix']) # (N, 4) or (N, 6) # Convert from camera frame to velo frame. cam2velo = self.lc_device.TRANSFORMS[ 'cTw'] # inverse extrinsics matrix xyz_velo = np.hstack( (points[:, :3], np.ones([len(points), 1], dtype=points.dtype))) xyz_velo = xyz_velo @ cam2velo.T points = np.hstack((xyz_velo[:, :3], points[:, 3:])) # Simulated LiDAR points. if create_init_lidar and query["init_lidar"]["num_beams"] > 1: num_beams = query["init_lidar"]["num_beams"] if self.mb_lidar_mask is None or self.mb_lidar_mask[ "num_beams"] != num_beams: self.create_mb_lidar_mask(num_beams) init_lidar_points = points[ self.mb_lidar_mask["1d_mask"]] # (R, 4) or (R, 6) init_lidar_points = init_lidar_points[:, :3] # (R, 3) # Only select points with less than MAX_RANGE value. init_lidar_points = init_lidar_points[ init_lidar_points[:, 0] < self._MAX_RANGE, :] res["init_lidar"]["points"] = init_lidar_points # Only select points with less than MAX_RANGE value. points = points[points[:, 0] < self._MAX_RANGE, :] res["lidar"]["points"] = points # Compute single-beam lidar. if create_init_lidar and query["init_lidar"]["num_beams"] == 1: init_lidar_points = self.convert_points_to_sb_lidar(points) init_lidar_points = init_lidar_points[:, :3] # (N, 3) res["init_lidar"]["points"] = init_lidar_points R0_rect = np.eye( 4, dtype=np.float32) # there is no rectification in vkitti Tr_velo_to_cam = self.lc_device.TRANSFORMS["wTc"] # Extended intrinsics matrix: shape= (4, 4). P2 = np.eye(4, dtype=np.float32) P2[:3, :3] = self.lc_device.CAMERA_PARAMS["matrix"] calib_dict = { 'R0_rect': R0_rect, 'Tr_velo_to_cam': Tr_velo_to_cam, 'P2': P2 } res["calib"] = calib_dict if 'annos' in info: annos = info['annos'] # we need other objects to avoid collision when sample annos = vkitti.remove_dontcare(annos) locs = annos["location"] dims = annos["dimensions"] rots = annos["rotation_y"] gt_names = annos["name"] gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) calib = info["calib"] gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, r_rect=R0_rect, velo2cam=Tr_velo_to_cam) # only center format is allowed. so we need to convert # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5] box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0], [0.5, 0.5, 0.5]) res["lidar"]["annotations"] = { 'boxes': gt_boxes, 'names': gt_names, } res["cam"]["annotations"] = { 'boxes': annos["bbox"], 'names': gt_names, } return res
def get_pointcloud(): global BACKEND instance = request.json response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") if BACKEND.kitti_infos is None: return error_response("kitti info is not loaded") image_idx = instance["image_idx"] enable_int16 = instance["enable_int16"] idx = BACKEND.image_idxes.index(image_idx) kitti_info = BACKEND.kitti_infos[idx] pc_info = kitti_info["point_cloud"] image_info = kitti_info["image"] calib = kitti_info["calib"] rect = calib['R0_rect'] Trv2c = calib['Tr_velo_to_cam'] P2 = calib['P2'] img_shape = image_info["image_shape"] # hw wh = np.array(img_shape[::-1]) whwh = np.tile(wh, 2) if 'annos' in kitti_info: annos = kitti_info['annos'] labels = annos['name'] num_obj = len([n for n in annos['name'] if n != 'DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] bbox = annos['bbox'][:num_obj] / whwh gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(gt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = gt_boxes[:, :3] dims = gt_boxes[:, 3:6] rots = np.concatenate( [np.zeros([num_obj, 2], dtype=np.float32), -gt_boxes[:, 6:7]], axis=1) frontend_annos = {} response["locs"] = locs.tolist() response["dims"] = dims.tolist() response["rots"] = rots.tolist() response["bbox"] = bbox.tolist() response["labels"] = labels[:num_obj].tolist() response["num_features"] = pc_info["num_features"] v_path = str(Path(BACKEND.root_path) / pc_info['velodyne_path']) points = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, pc_info["num_features"]]) if instance['remove_outside']: if 'image_shape' in image_info: image_shape = image_info['image_shape'] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) else: points = points[points[..., 2] > 0] if enable_int16: int16_factor = instance["int16_factor"] points *= int16_factor points = points.astype(np.int16) pc_str = base64.b64encode(points.tobytes()) response["pointcloud"] = pc_str.decode("utf-8") if "with_det" in instance and instance["with_det"]: if BACKEND.dt_annos is None: return error_response("det anno is not loaded") dt_annos = BACKEND.dt_annos[idx] dims = dt_annos['dimensions'] num_obj = dims.shape[0] loc = dt_annos['location'] rots = dt_annos['rotation_y'] bbox = dt_annos['bbox'] / whwh labels = dt_annos['name'] dt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) dt_boxes = box_np_ops.box_camera_to_lidar(dt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = dt_boxes[:, :3] dims = dt_boxes[:, 3:6] rots = np.concatenate( [np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1) response["dt_locs"] = locs.tolist() response["dt_dims"] = dims.tolist() response["dt_rots"] = rots.tolist() response["dt_labels"] = labels.tolist() response["dt_bbox"] = bbox.tolist() response["dt_scores"] = dt_annos["score"].tolist() # if "score" in annos: # response["score"] = score.tolist() response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' print("send response with size {}!".format(len(pc_str))) return response
def __getitem__(self, idx): """ you need to create a input dict in this function for network inference. format: { anchors voxels num_points coordinates ground_truth: { gt_boxes gt_names [optional]difficulty [optional]group_ids } [optional]anchors_mask, slow in SECOND v1.5, don't use this. [optional]metadata, in kitti, image index is saved in metadata } """ info = self._kitti_infos[idx] kitti.convert_to_kitti_info_version2(info) pc_info = info["point_cloud"] if "points" not in pc_info: velo_path = pathlib.Path(pc_info['velodyne_path']) if not velo_path.is_absolute(): velo_path = pathlib.Path( self._root_path) / pc_info['velodyne_path'] velo_reduced_path = velo_path.parent.parent / ( velo_path.parent.stem + '_reduced') / velo_path.name if velo_reduced_path.exists(): velo_path = velo_reduced_path points = np.fromfile(str(velo_path), dtype=np.float32, count=-1).reshape( [-1, self._num_point_features]) input_dict = { 'points': points, } if "image" in info: input_dict["image"] = info["image"] if "calib" in info: calib = info["calib"] calib_dict = { 'rect': calib['R0_rect'], 'Trv2c': calib['Tr_velo_to_cam'], 'P2': calib['P2'], } input_dict["calib"] = calib_dict if 'annos' in info: annos = info['annos'] annos_dict = {} # we need other objects to avoid collision when sample annos = kitti.remove_dontcare(annos) loc = annos["location"] dims = annos["dimensions"] rots = annos["rotation_y"] gt_names = annos["name"] gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) if "calib" in info: calib = info["calib"] gt_boxes = box_np_ops.box_camera_to_lidar( gt_boxes, calib["R0_rect"], calib["Tr_velo_to_cam"]) # only center format is allowed. so we need to convert # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5] box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0], [0.5, 0.5, 0.5]) gt_dict = { 'gt_boxes': gt_boxes, 'gt_names': gt_names, } if 'difficulty' in annos: gt_dict['difficulty'] = annos["difficulty"] if 'group_ids' in annos: gt_dict['group_ids'] = annos["group_ids"] input_dict["ground_truth"] = gt_dict example = self._prep_func(input_dict=input_dict) example["metadata"] = {} if "image" in info: example["metadata"]["image"] = input_dict["image"] if "anchors_mask" in example: example["anchors_mask"] = example["anchors_mask"].astype(np.uint8) return example
def __getitem__(self, idx): """ Args: idx: (int) this is the index of the frame, from 0 to len(Video) - 1 """ frame_info = self.video_info[idx] # frame info root_path = Path(os.environ["DATADIR"]) / "synthia" res = { "depth": None, # np.ndarray, dtype=np.float32, shape=(H, W) "points": None, "cam": { "image_str": None, # str, image string "datatype": None, # str, suffix type }, "metadata": { "frameid": frame_info["frameid"], "image_shape": frame_info["image"]["image_shape"] }, "calib": frame_info["calib"], "annos": None } # -------------------------------------------------------------------------------------------------------------- # depth # -------------------------------------------------------------------------------------------------------------- depth_path = Path(frame_info["depth"]["depth_path"]) if not depth_path.is_absolute(): depth_path = root_path / depth_path # synthia depth formula: "Depth = 5000 * (R + G*256 + B*256*256) / (256*256*256 - 1)" np_depth_image = np.array( Image.open(depth_path)) # (H, W, 4) dtype=np.uint8 R, G, B = [np_depth_image[:, :, e].astype(np.int64) for e in range(3)] # (H, W), dtype=np.int64 np_depth_image = 5000 * (R + G * 256 + B * 256 * 256) / ( 256 * 256 * 256 - 1) # (H, W) dtype=np.float64 np_depth_image = np_depth_image.astype( np.float32) # (H, W) dtype=np.float32 res["depth"] = np_depth_image # -------------------------------------------------------------------------------------------------------------- # cam # -------------------------------------------------------------------------------------------------------------- if self.cam or self.colored_pc: image_path = Path(frame_info['image']['image_path']) if not image_path.is_absolute(): image_path = root_path / image_path with open(str(image_path), 'rb') as f: image_str = f.read() res["cam"]["image_str"] = image_str res["cam"]["datatype"] = image_path.suffix[1:] # -------------------------------------------------------------------------------------------------------------- # points # -------------------------------------------------------------------------------------------------------------- np_depth_image = np_depth_image[..., np.newaxis] # (H, W, 1) if self.colored_pc: # concatenate depth map with colors np_rgb_image = np.array(Image.open( io.BytesIO(image_str))) # (H, W, 4) np_rgb_image = np_rgb_image[:, :, :3] # (H, W, 3) np_depth_image = np.concatenate([np_depth_image, np_rgb_image], axis=2) # (H, W, 4) # points in cam frame P2 = frame_info['calib']['P2'] # intrinsics matrix if P2.shape == (4, 4): P2 = P2[:3, :3] else: assert P2.shape == (3, 3) points = cam_transforms.depth_map_to_point_cloud( np_depth_image, P2) # (N, 3) or (N, 6) # points in velo frame Tr_velo_to_cam = frame_info['calib'][ 'Tr_velo_to_cam'] # extrinsics matrix Tr_cam_to_velo = np.linalg.inv(Tr_velo_to_cam) xyz1_cam = np.hstack( (points[:, :3], np.ones([len(points), 1], dtype=points.dtype))) # (N, 4) xyz1_velo = xyz1_cam @ Tr_cam_to_velo.T # (N, 4) points = np.hstack((xyz1_velo[:, :3], points[:, 3:])) # (N, 3) or (N, 6) # points within MAX_DEPTH points = points[points[:, 0] < self._MAX_DEPTH, :] # (M, 3) or (M, 6) res["points"] = points # -------------------------------------------------------------------------------------------------------------- # annos # -------------------------------------------------------------------------------------------------------------- annos = frame_info['annos'] annos = self._remove_dontcare(annos) locs = annos["location"] dims = annos["dimensions"] rots = annos["rotation_y"] gt_names = annos["name"] gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) gt_boxes = box_np_ops.box_camera_to_lidar( gt_boxes, r_rect=frame_info["calib"]["R0_rect"], velo2cam=frame_info["calib"]["Tr_velo_to_cam"]) # only center format is allowed. so we need to convert kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5] box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0], [0.5, 0.5, 0.5]) res["annos"] = { 'names': gt_names, 'boxes': gt_boxes, 'boxes2d': annos["bbox"] } return res
def prep_pointcloud( input_dict, root_path, # voxel_generator, fv_generator, target_assigner, db_sampler=None, max_voxels=20000, class_names=['Car'], remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, reduce_valid_area=False, remove_unknown=False, gt_rotation_noise=[-np.pi / 3, np.pi / 3], gt_loc_noise_std=[1.0, 1.0, 1.0], global_rotation_noise=[-np.pi / 4, np.pi / 4], global_scaling_noise=[0.95, 1.05], global_loc_noise_std=(0.2, 0.2, 0.2), global_random_rot_range=[0.78, 2.35], generate_bev=False, without_reflectivity=False, num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=False, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, add_rgb_to_points=False, lidar_input=False, unlabeled_db_sampler=None, out_size_factor=2, min_gt_point_dict=None, bev_only=False, use_group_id=False, out_dtype=np.float32, num_classes=1, RGB_embedding=False): """convert point cloud to voxels, create targets if ground truths exists. """ # prep_pointcloud_start = time.time() points = input_dict["points"] # if training: gt_boxes = input_dict["gt_boxes"] gt_names = input_dict["gt_names"] difficulty = input_dict["difficulty"] group_ids = None if use_group_id and "group_ids" in input_dict: group_ids = input_dict["group_ids"] rect = input_dict["rect"] Trv2c = input_dict["Trv2c"] P2 = input_dict["P2"] unlabeled_training = unlabeled_db_sampler is not None image_idx = input_dict["image_idx"] # t1 = time.time() - prep_pointcloud_start if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) # t2 = time.time() - prep_pointcloud_start # print("t2-t1: ", t2-t1) # 0.035 if reference_detections is not None: C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T # frustums = np.linalg.inv(R) @ frustums.T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: # and not lidar_input: image_shape = input_dict["image_shape"] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) if remove_environment is True: # and training: selected = kitti.keep_arrays_by_name(gt_names, class_names) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] points = prep.remove_points_outside_boxes(points, gt_boxes) # if training: # print(gt_names) selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"]) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] # t3 = time.time() - prep_pointcloud_start # print("t3-t2: ", t3 - t2) # 0.0002 gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) if remove_unknown: remove_mask = difficulty == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) gt_boxes = gt_boxes[keep_mask] gt_names = gt_names[keep_mask] difficulty = difficulty[keep_mask] if group_ids is not None: group_ids = group_ids[keep_mask] gt_boxes_mask = np.array([n in class_names for n in gt_names], dtype=np.bool_) # t4 = time.time() - prep_pointcloud_start # print("t4-t3: ", t4 - t3) # 0.001 if RGB_embedding: RGB_image = cv2.imread(input_dict['image_path']) points_camera = box_np_ops.box_lidar_to_camera(points[:, :3], rect, Trv2c) points_to_image_idx = box_np_ops.project_to_image(points_camera, P2) points_to_image_idx = points_to_image_idx.astype(int) mask = box_np_ops.remove_points_outside_image(RGB_image, points_to_image_idx) points = points[mask] points_to_image_idx = points_to_image_idx[mask] BGR = RGB_image[points_to_image_idx[:, 1], points_to_image_idx[:, 0]] points = np.concatenate((points, BGR), axis=1) # test_mask = points_camera[mask][:, 0] < 0 # test_image_idx = points_to_image_idx[test_mask] # RGB_image[test_image_idx[:, 1], test_image_idx[:, 0]] = [255, 0, 0] # test_mask = points_camera[mask][:, 0] >= 0 # test_image_idx = points_to_image_idx[test_mask] # RGB_image[test_image_idx[:, 1], test_image_idx[:, 0]] = [0, 0, 255] # print() # t5 = time.time() - prep_pointcloud_start # print("t5-t4: ", t5 - t4) # 0.019 # TODO if db_sampler is not None and training: # and not RGB_embedding: if RGB_embedding: num_point_features += 3 fg_points_mask = box_np_ops.points_in_rbbox(points, gt_boxes) fg_points_list = [] bg_points_mask = np.zeros((points.shape[0]), dtype=bool) for i in range(fg_points_mask.shape[1]): fg_points_list.append(points[fg_points_mask[:, i]]) bg_points_mask = np.logical_or(bg_points_mask, fg_points_mask[:, i]) bg_points_mask = np.logical_not(bg_points_mask) sampled_dict = db_sampler.sample_all(root_path, points[bg_points_mask], gt_boxes, gt_names, fg_points_list, num_point_features, random_crop, gt_group_ids=group_ids, rect=rect, Trv2c=Trv2c, P2=P2) # sampled_dict = db_sampler.sample_all( # root_path, # gt_boxes, # gt_names, # num_point_features, # random_crop, # gt_group_ids=group_ids, # rect=rect, # Trv2c=Trv2c, # P2=P2) # t_sample_all = time.time() - prep_pointcloud_start # print("t_sample_all - t5: ", t_sample_all - t5) # 3.83 if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] remained_boxes_idx = sampled_dict["remained_boxes_idx"] # gt_names = gt_names[gt_boxes_mask].tolist() gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) # gt_names += [s["name"] for s in sampled] gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes]) gt_boxes_mask = np.concatenate([gt_boxes_mask, sampled_gt_masks], axis=0) gt_names = gt_names[remained_boxes_idx] gt_boxes = gt_boxes[remained_boxes_idx] gt_boxes_mask = gt_boxes_mask[remained_boxes_idx] if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] group_ids = np.concatenate([group_ids, sampled_group_ids]) group_ids = group_ids[remained_boxes_idx] # if remove_points_after_sample: # # points = prep.remove_points_in_boxes( # # points, sampled_gt_boxes) # locs = sampled_gt_boxes[:, 0:3] # dims = sampled_gt_boxes[:, 3:6] # angles = sampled_gt_boxes[:, 6] # camera_box_origin = [0.5, 0.5, 0] # # box_corners = box_np_ops.center_to_corner_box3d( # locs, dims, angles, camera_box_origin, axis=2) # box_corners_in_image = box_np_ops.project_to_fv_image( # box_corners, example['grid_size'][i], example['meta'][i]) # box_centers_in_image = box_np_ops.project_to_fv_image( # locs, example['grid_size'][i], example['meta'][i]) # t_sample_all2 = time.time() - prep_pointcloud_start # print("t_sample_all2 - t_sample_all: ", t_sample_all2 - t_sample_all) # 0.0002 # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_) # if without_reflectivity and training: # used_point_axes = list(range(num_point_features)) # used_point_axes.pop(3) # points = points[:, used_point_axes] # pc_range = voxel_generator.point_cloud_range # bev_only = False # if bev_only: # set z and h to limits # gt_boxes[:, 2] = pc_range[2] # gt_boxes[:, 5] = pc_range[5] - pc_range[2] if training: gt_loc_noise_std = [0.0, 0.0, 0.0] prep.noise_per_object_v3_( gt_boxes, points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # t_noise = time.time() - prep_pointcloud_start # print("t_noise - t_sample_all2: ", t_noise - t_sample_all2) # 12.01 # should remove unrelated objects after noise per object gt_boxes = gt_boxes[gt_boxes_mask] gt_names = gt_names[gt_boxes_mask] if group_ids is not None: group_ids = group_ids[gt_boxes_mask] gt_classes = np.array([class_names.index(n) + 1 for n in gt_names], dtype=np.int32) # t6 = time.time() - prep_pointcloud_start # print("t6-t5: ", t6 - t5) # 16.0 if training: gt_boxes, points = prep.random_flip(gt_boxes, points) # gt_boxes, points = prep.global_rotation( # gt_boxes, points, rotation=global_rotation_noise) gt_boxes, points = prep.global_scaling_v2(gt_boxes, points, *global_scaling_noise) # Global translation # gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std) # bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] bv_range = [0, -40, 70.4, 40] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) gt_boxes = gt_boxes[mask] gt_classes = gt_classes[mask] if group_ids is not None: group_ids = group_ids[mask] # limit rad to [-pi, pi] gt_boxes[:, 6] = box_np_ops.limit_period(gt_boxes[:, 6], offset=0.5, period=2 * np.pi) # TODO # if shuffle_points: # # shuffle is a little slow. # np.random.shuffle(points) # # t7 = time.time() - prep_pointcloud_start # # print("t7-t6: ", t7 - t6) # 1.95 # voxels, coordinates, num_points = voxel_generator.generate( # points, max_voxels, RGB_embedding=RGB_embedding) # # t8 = time.time() - prep_pointcloud_start # # print("t8-t7: ", t8 - t7) # 2.0 # voxel_size = voxel_generator.voxel_size # grid_size = voxel_generator.grid_size # pc_range = copy.deepcopy(voxel_generator.point_cloud_range) # grid_size = voxel_generator.grid_size # phi_min = voxel_generator.phi_min # theta_min = voxel_generator.theta_min # image_h, image_w = grid_size[1], grid_size[0] # c = np.array([image_w / 2., image_h / 2.]) # s = np.array([image_w, image_h], dtype=np.int32) # meta = {'c': c, 's': s, 'calib': P2, 'phi_min': phi_min, 'theta_min': theta_min} # t7 = time.time() - prep_pointcloud_start # print("t7-t6: ", t7 - t6) # 1.95 fv_image, points_mask = fv_generator.generate(points, RGB_embedding=RGB_embedding, occupancy_embedding=False) # t8 = time.time() - prep_pointcloud_start # print("t8-t7: ", t8 - t7) # 2.0 fv_dim = fv_generator.fv_dim pc_range = copy.deepcopy(fv_generator.spherical_coord_range) grid_size = fv_generator.grid_size phi_min = fv_generator.phi_min theta_min = fv_generator.theta_min image_h, image_w = fv_dim[1], fv_dim[0] c = np.array([image_w / 2., image_h / 2.]) s = np.array([image_w, image_h], dtype=np.int32) meta = { 'c': c, 's': s, 'calib': P2, 'phi_min': phi_min, 'theta_min': theta_min } fv_image = np.transpose(fv_image, [2, 1, 0]) max_objs = 50 num_objs = min(gt_boxes.shape[0], max_objs) box_np_ops.change_box3d_center_(gt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) spherical_gt_boxes = np.zeros((max_objs, gt_boxes.shape[1])) spherical_gt_boxes[:num_objs, :] = gt_boxes[:num_objs, :] spherical_gt_boxes[:num_objs, :] = convert_to_spherical_coor( gt_boxes[:num_objs, :]) spherical_gt_boxes[:num_objs, 0] -= phi_min spherical_gt_boxes[:num_objs, 1] -= theta_min spherical_gt_boxes[:num_objs, 0] /= grid_size[0] spherical_gt_boxes[:num_objs, 1] /= grid_size[1] spherical_gt_boxes, num_objs = filter_outside_range( spherical_gt_boxes, num_objs, fv_dim) # t9 = time.time() - prep_pointcloud_start # print("t9-t8: ", t9 - t8) # 0.0005 example = { 'fv_image': fv_image, 'grid_size': grid_size, 'pc_range': pc_range, 'meta': meta, 'spherical_gt_boxes': spherical_gt_boxes, 'resized_image_shape': grid_size } example.update({'rect': rect, 'Trv2c': Trv2c, 'P2': P2}) if RGB_embedding: RGB_image = cv2.resize(RGB_image, (image_w, image_h)) example.update({'RGB_image': RGB_image}) if training: hm = np.zeros((num_classes, image_h, image_w), dtype=np.float32) reg = np.zeros((max_objs, 2), dtype=np.float32) dep = np.zeros((max_objs, 1), dtype=np.float32) rotbin = np.zeros((max_objs, 2), dtype=np.int64) rotres = np.zeros((max_objs, 2), dtype=np.float32) dim = np.zeros((max_objs, 3), dtype=np.float32) ind = np.zeros((max_objs), dtype=np.int64) reg_mask = np.zeros((max_objs), dtype=np.uint8) rot_mask = np.zeros((max_objs), dtype=np.uint8) # # hm = np.zeros((num_classes, image_h, image_w), dtype=np.float32) # reg = np.zeros((image_w, image_h, 2), dtype=np.float32) # dep = np.zeros((image_w, image_h, 1), dtype=np.float32) # rotbin = np.zeros((image_w, image_h, 2), dtype=np.int64) # rotres = np.zeros((image_w, image_h, 2), dtype=np.float32) # dim = np.zeros((image_w, image_h, 3), dtype=np.float32) # # ind = np.zeros((max_objs), dtype=np.int64) # fg_mask = np.zeros((image_w, image_h), dtype=np.uint8) # # rot_mask = np.zeros((max_objs), dtype=np.uint8) draw_gaussian = draw_umich_gaussian # center heatmap radius = int(image_h / 30) # radius = int(image_h / 25) for k in range(num_objs): gt_3d_box = spherical_gt_boxes[k] cls_id = 0 # print('heatmap gaussian radius: ' + str(radius)) ct = np.array([gt_3d_box[0], gt_3d_box[1]], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct, radius) # depth(distance), wlh dep[k] = gt_3d_box[2] dim[k] = gt_3d_box[3:6] # dep[ct_int[0], ct_int[1], 0] = gt_3d_box[2] # dim[ct_int[0], ct_int[1], :] = gt_3d_box[3:6] # reg, ind, mask reg[k] = ct - ct_int ind[k] = ct_int[1] * image_w + ct_int[0] reg_mask[k] = rot_mask[k] = 1 # fg_mask[ct_int[0], ct_int[1]] = 1 # ry ry = gt_3d_box[6] if ry < np.pi / 6. or ry > 5 * np.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = ry - (-0.5 * np.pi) # rotbin[ct_int[0], ct_int[1], 0] = 1 # rotres[ct_int[0], ct_int[1], 0] = ry - (-0.5 * np.pi) if ry > -np.pi / 6. or ry < -5 * np.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = ry - (0.5 * np.pi) # rotbin[ct_int[0], ct_int[1], 1] = 1 # rotres[ct_int[0], ct_int[1], 1] = ry - (0.5 * np.pi) example.update({ 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind, 'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask, 'rot_mask': rot_mask, 'reg': reg }) # example.update({ # 'hm': hm, 'dep': dep, 'dim': dim, # 'rotbin': rotbin, 'rotres': rotres, # 'fg_mask': fg_mask, 'reg': reg # }) # t10 = time.time() - prep_pointcloud_start # print("total: ", t10) # 19.58 return example