def convert_detection_to_kitti_annos(self, detection): class_names = self._class_names det_image_idxes = [k for k in detection.keys()] gt_image_idxes = [ str(info["image"]["image_idx"]) for info in self._kitti_infos ] # print(f"det_image_idxes: {det_image_idxes[:10]}") # print(f"gt_image_idxes: {gt_image_idxes[:10]}") annos = [] # for i in range(len(detection)): for det_idx in gt_image_idxes: det = detection[det_idx] info = self._kitti_infos[gt_image_idxes.index(det_idx)] # info = self._kitti_infos[i] calib = info["calib"] rect = calib["R0_rect"] Trv2c = calib["Tr_velo_to_cam"] P2 = calib["P2"] final_box_preds = det["box3d_lidar"].detach().cpu().numpy() label_preds = det["label_preds"].detach().cpu().numpy() scores = det["scores"].detach().cpu().numpy() anno = get_start_result_anno() num_example = 0 if final_box_preds.shape[0] != 0: final_box_preds[:, -1] = box_np_ops.limit_period( final_box_preds[:, -1], offset=0.5, period=np.pi * 2, ) final_box_preds[:, 2] -= final_box_preds[:, 5] / 2 # aim: x, y, z, w, l, h, r -> -y, -z, x, h, w, l, r # (x, y, z, w, l, h r) in lidar -> (x', y', z', l, h, w, r) in camera box3d_camera = box_np_ops.box_lidar_to_camera( final_box_preds, rect, Trv2c) camera_box_origin = [0.5, 1.0, 0.5] box_corners = box_np_ops.center_to_corner_box3d( box3d_camera[:, :3], box3d_camera[:, 3:6], box3d_camera[:, 6], camera_box_origin, axis=1, ) box_corners_in_image = box_np_ops.project_to_image( box_corners, P2) # box_corners_in_image: [N, 8, 2] minxy = np.min(box_corners_in_image, axis=1) maxxy = np.max(box_corners_in_image, axis=1) bbox = np.concatenate([minxy, maxxy], axis=1) for j in range(box3d_camera.shape[0]): image_shape = info["image"]["image_shape"] if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]: continue if bbox[j, 2] < 0 or bbox[j, 3] < 0: continue bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1]) bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0]) anno["bbox"].append(bbox[j]) anno["alpha"].append(-np.arctan2(-final_box_preds[j, 1], final_box_preds[j, 0]) + box3d_camera[j, 6]) # anno["dimensions"].append(box3d_camera[j, [4, 5, 3]]) anno["dimensions"].append(box3d_camera[j, 3:6]) anno["location"].append(box3d_camera[j, :3]) anno["rotation_y"].append(box3d_camera[j, 6]) anno["name"].append(class_names[int(label_preds[j])]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(scores[j]) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["metadata"] = det["metadata"] return annos
def convert_detection_to_kitti_annos(self, detection): class_names = self._class_names det_image_idxes = [k for k in detection.keys()] gt_image_idxes = [ str(info["image"]["image_idx"]) for info in self._kitti_infos ] annos = [] for det_idx in gt_image_idxes: det = detection[det_idx] dim = det['box3d_lidar'][:, 3:6] l, w, h = dim[:, 0:1], dim[:, 1:2], dim[:, 2:3] det['box3d_lidar'][:, 2] = (det['box3d_lidar'][:, 2].T + (h / 2).T).reshape(-1) det['box3d_lidar'][:, -1] = det['box3d_lidar'][:, -1] * -1 info = self._kitti_infos[gt_image_idxes.index(det_idx)] # info = self._kitti_infos[i] calib = info["calib"] rect = calib["R0_rect"] Trv2c = calib["Tr_velo_to_cam"] P2 = calib["P2"] # final_box_preds = det["box3d_lidar"].detach().cpu().numpy() # label_preds = det["label_preds"].detach().cpu().numpy() # scores = det["scores"].detach().cpu().numpy() final_box_preds = det["box3d_lidar"] label_preds = det["label_preds"] scores = det["scores"] anno = get_start_result_anno() num_example = 0 if final_box_preds.shape[0] != 0: final_box_preds[:, -1] = box_np_ops.limit_period( final_box_preds[:, -1], offset=0.5, period=np.pi * 2, ) box3d_camera = final_box_preds camera_box_origin = [0.5, 0.5, 0.5] box_corners = box_np_ops.center_to_corner_box3d( box3d_camera[:, :3], box3d_camera[:, 3:6], box3d_camera[:, 6], camera_box_origin, axis=2, ) box_corners_in_image = box_np_ops.project_to_image( box_corners, P2) # box_corners_in_image: [N, 8, 2] minxy = np.min(box_corners_in_image, axis=1) maxxy = np.max(box_corners_in_image, axis=1) bbox = np.concatenate([minxy, maxxy], axis=1) for j in range(box3d_camera.shape[0]): anno["bbox"].append([-1, -1, -1, -1]) anno["alpha"].append(0) # anno["dimensions"].append(box3d_camera[j, [4, 5, 3]]) anno["dimensions"].append(box3d_camera[j, 3:6]) anno["location"].append(box3d_camera[j, :3]) anno["rotation_y"].append(box3d_camera[j, 6]) anno["name"].append(class_names[int(label_preds[j] - 1)]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(scores[j]) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["metadata"] = det["metadata"] return annos