Python change_box3d_center_の例

プログラミング言語: Python

名前空間/パッケージ名: second.core.box_np_ops

メソッド/関数: change_box3d_center_

hotexamples.comのコード掲載数: 10

Python change_box3d_center_ - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsecond.core.box_np_ops.change_box3d_center_の実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: backend.py プロジェクト: miaozhenwei/second.pytorch

def inference_by_idx():
    global BACKEND
    instance = request.json
    response = {"status": "normal"}
    if BACKEND.root_path is None:
        return error_response("root path is not set")
    if BACKEND.kitti_infos is None:
        return error_response("kitti info is not loaded")
    if BACKEND.inference_ctx is None:
        return error_response("inference_ctx is not loaded")
    image_idx = instance["image_idx"]
    idx = BACKEND.image_idxes.index(image_idx)
    kitti_info = BACKEND.kitti_infos[idx]

    v_path = str(Path(BACKEND.root_path) / kitti_info['velodyne_path'])
    num_features = 4
    points = np.fromfile(
        str(v_path), dtype=np.float32,
        count=-1).reshape([-1, num_features])
    rect = kitti_info['calib/R0_rect']
    P2 = kitti_info['calib/P2']
    Trv2c = kitti_info['calib/Tr_velo_to_cam']
    if 'img_shape' in kitti_info:
        image_shape = kitti_info['img_shape']
        points = box_np_ops.remove_outside_points(
            points, rect, Trv2c, P2, image_shape)
        print(points.shape[0])

    t = time.time()
    inputs = BACKEND.inference_ctx.get_inference_input_dict(
        kitti_info, points)
    print("input preparation time:", time.time() - t)
    t = time.time()
    with BACKEND.inference_ctx.ctx():
        dt_annos = BACKEND.inference_ctx.inference(inputs)[0]
    print("detection time:", time.time() - t)
    dims = dt_annos['dimensions']
    num_obj = dims.shape[0]
    loc = dt_annos['location']
    rots = dt_annos['rotation_y']
    labels = dt_annos['name']

    dt_boxes_camera = np.concatenate(
        [loc, dims, rots[..., np.newaxis]], axis=1)
    dt_boxes = box_np_ops.box_camera_to_lidar(
        dt_boxes_camera, rect, Trv2c)
    box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5])
    locs = dt_boxes[:, :3]
    dims = dt_boxes[:, 3:6]
    rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1)
    response["dt_locs"] = locs.tolist()
    response["dt_dims"] = dims.tolist()
    response["dt_rots"] = rots.tolist()
    response["dt_labels"] = labels.tolist()
    response["dt_scores"] = dt_annos["score"].tolist()

    response = jsonify(results=[response])
    response.headers['Access-Control-Allow-Headers'] = '*'
    return response

コード例 #2

ファイルを表示

def kitti_anno_to_corners(info, annos=None):
    rect = info['calib/R0_rect']
    P2 = info['calib/P2']
    Tr_velo_to_cam = info['calib/Tr_velo_to_cam']
    if annos is None:
        annos = info['annos']
    dims = annos['dimensions']
    loc = annos['location']
    rots = annos['rotation_y']
    scores = None
    if 'score' in annos:
        scores = annos['score']
    boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1)
    boxes_lidar = box_np_ops.box_camera_to_lidar(boxes_camera, rect,
                                                 Tr_velo_to_cam)
    box_np_ops.change_box3d_center_(boxes_lidar, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5])     #pang added 02/02/2019 to make the position of the bounding boxes correct
    boxes_corners = box_np_ops.center_to_corner_box3d(
        boxes_lidar[:, :3],
        boxes_lidar[:, 3:6],
        boxes_lidar[:, 6],
        origin=[0.5, 0.5, 0],
        axis=2)
    return boxes_corners, scores, boxes_lidar

コード例 #3

ファイルを表示

ファイル: main.py プロジェクト: han1366/pointpillars_TANet

def get_pointcloud():
    global BACKEND
    instance = request.json
    response = {"status": "normal"}
    if BACKEND.root_path is None:
        return error_response("root path is not set")
    if BACKEND.kitti_infos is None:
        return error_response("kitti info is not loaded")
    image_idx = instance["image_idx"]
    print("image_idx",image_idx)
    idx = BACKEND.image_idxes.index(image_idx)
    kitti_info = BACKEND.kitti_infos[idx]
    rect = kitti_info['calib/R0_rect']
    P2 = kitti_info['calib/P2']
    Trv2c = kitti_info['calib/Tr_velo_to_cam']
    img_shape = kitti_info["img_shape"]  # hw
    wh = np.array(img_shape[::-1])
    whwh = np.tile(wh, 2)
    if 'annos' in kitti_info:
        annos = kitti_info['annos']
        labels = annos['name']
        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
        dims = annos['dimensions'][:num_obj]
        loc = annos['location'][:num_obj]
        rots = annos['rotation_y'][:num_obj]
        bbox = annos['bbox'][:num_obj] / whwh
        gt_boxes_camera = np.concatenate(
            [loc, dims, rots[..., np.newaxis]], axis=1)
        gt_boxes = box_np_ops.box_camera_to_lidar(
            gt_boxes_camera, rect, Trv2c)
        box_np_ops.change_box3d_center_(gt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5])
        locs = gt_boxes[:, :3]
        dims = gt_boxes[:, 3:6]
        rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -gt_boxes[:, 6:7]], axis=1)
        frontend_annos = {}
        response["locs"] = locs.tolist()
        response["dims"] = dims.tolist()
        response["rots"] = rots.tolist()
        response["bbox"] = bbox.tolist()

        response["labels"] = labels[:num_obj].tolist()

    v_path = str(Path(BACKEND.root_path) / kitti_info['velodyne_path'])
    print("v_path:",v_path)
    with open(v_path, 'rb') as f:
        pc_str = base64.encodebytes(f.read())
    response["pointcloud"] = pc_str.decode("utf-8")
    if "with_det" in instance and instance["with_det"]:
        if BACKEND.dt_annos is None:
            return error_response("det anno is not loaded")
        dt_annos = BACKEND.dt_annos[idx]
        dims = dt_annos['dimensions']
        num_obj = dims.shape[0]
        loc = dt_annos['location']
        rots = dt_annos['rotation_y']
        bbox = dt_annos['bbox'] / whwh
        labels = dt_annos['name']

        dt_boxes_camera = np.concatenate(
            [loc, dims, rots[..., np.newaxis]], axis=1)
        dt_boxes = box_np_ops.box_camera_to_lidar(
            dt_boxes_camera, rect, Trv2c)
        box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5])
        locs = dt_boxes[:, :3]
        dims = dt_boxes[:, 3:6]
        rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1)
        response["dt_locs"] = locs.tolist()
        response["dt_dims"] = dims.tolist()
        response["dt_rots"] = rots.tolist()
        response["dt_labels"] = labels.tolist()
        response["dt_bbox"] = bbox.tolist()
        response["dt_scores"] = dt_annos["score"].tolist()

    # if "score" in annos:
    #     response["score"] = score.tolist()
    response = jsonify(results=[response])
    response.headers['Access-Control-Allow-Headers'] = '*'
    print("send response!")
    return response

コード例 #4

ファイルを表示

ファイル: create_data.py プロジェクト: zhb0920/second.pytorch

def create_groundtruth_database(data_path,
                                info_path=None,
                                used_classes=None,
                                database_save_path=None,
                                db_info_save_path=None,
                                relative_path=True,
                                lidar_only=False,
                                bev_only=False,
                                coors_range=None):
    root_path = pathlib.Path(data_path)
    if info_path is None:
        info_path = root_path / 'kitti_infos_train.pkl'
    if database_save_path is None:
        database_save_path = root_path / 'gt_database'
    else:
        database_save_path = pathlib.Path(database_save_path)
    if db_info_save_path is None:
        db_info_save_path = root_path / "kitti_dbinfos_train.pkl"
    database_save_path.mkdir(parents=True, exist_ok=True)
    with open(info_path, 'rb') as f:
        kitti_infos = pickle.load(f)
    all_db_infos = {}
    if used_classes is None:
        used_classes = list(kitti.get_classes())
        used_classes.pop(used_classes.index('DontCare'))
    for name in used_classes:
        all_db_infos[name] = []
    group_counter = 0
    for info in prog_bar(kitti_infos):
        pc_info = info["point_cloud"]
        image_info = info["image"]
        calib = info["calib"]

        velodyne_path = pc_info['velodyne_path']
        if relative_path:
            # velodyne_path = str(root_path / velodyne_path) + "_reduced"
            velodyne_path = str(root_path / velodyne_path)
        num_features = 4
        if 'num_features' in pc_info:
            num_features = pc_info['num_features']
        points = np.fromfile(
            velodyne_path, dtype=np.float32, count=-1).reshape([-1, num_features])

        image_idx = image_info["image_idx"]
        rect = calib['R0_rect']
        P2 = calib['P2']
        Trv2c = calib['Tr_velo_to_cam']
        if not lidar_only:
            points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2,
                                                        image_info["image_shape"])

        annos = info["annos"]
        names = annos["name"]
        bboxes = annos["bbox"]
        difficulty = annos["difficulty"]
        gt_idxes = annos["index"]
        num_obj = np.sum(annos["index"] >= 0)
        rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj]
        rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c)
        box_np_ops.change_box3d_center_(rbbox_lidar, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
        if bev_only: # set z and h to limits
            assert coors_range is not None
            rbbox_lidar[:, 2] = coors_range[2]
            rbbox_lidar[:, 5] = coors_range[5] - coors_range[2]
        
        group_dict = {}
        group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64)
        if "group_ids" in annos:
            group_ids = annos["group_ids"]
        else:
            group_ids = np.arange(bboxes.shape[0], dtype=np.int64)
        point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar)
        for i in range(num_obj):
            filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin"
            filepath = database_save_path / filename
            gt_points = points[point_indices[:, i]]

            gt_points[:, :3] -= rbbox_lidar[i, :3]
            with open(filepath, 'w') as f:
                gt_points.tofile(f)
            if names[i] in used_classes:
                if relative_path:
                    db_path = str(database_save_path.stem + "/" + filename)
                else:
                    db_path = str(filepath)
                db_info = {
                    "name": names[i],
                    "path": db_path,
                    "image_idx": image_idx,
                    "gt_idx": gt_idxes[i],
                    "box3d_lidar": rbbox_lidar[i],
                    "num_points_in_gt": gt_points.shape[0],
                    "difficulty": difficulty[i],
                    # "group_id": -1,
                    # "bbox": bboxes[i],
                }

                local_group_id = group_ids[i]
                # if local_group_id >= 0:
                if local_group_id not in group_dict:
                    group_dict[local_group_id] = group_counter
                    group_counter += 1
                db_info["group_id"] = group_dict[local_group_id]
                if "score" in annos:
                    db_info["score"] = annos["score"][i]
                all_db_infos[names[i]].append(db_info)
    for k, v in all_db_infos.items():
        print(f"load {len(v)} {k} database infos")

    with open(db_info_save_path, 'wb') as f:
        pickle.dump(all_db_infos, f)

コード例 #5

ファイルを表示

ファイル: kitti_dataset.py プロジェクト: ahmedius2/second.pytorch

    def get_sensor_data(self, query):
        read_image = False
        idx = query
        if isinstance(query, dict):
            read_image = "cam" in query
            assert "lidar" in query
            idx = query["lidar"]["idx"]
        info = self._kitti_infos[idx]
        res = {
            "lidar": {
                "type": "lidar",
                "points": None,
            },
            "metadata": {
                "image_idx": info["image"]["image_idx"],
                "image_shape": info["image"]["image_shape"],
            },
            "calib": None,
            "cam": {}
        }

        pc_info = info["point_cloud"]
        velo_path = Path(pc_info['velodyne_path'])
        if not velo_path.is_absolute():
            velo_path = Path(self._root_path) / pc_info['velodyne_path']
        velo_reduced_path = velo_path.parent.parent / (
            velo_path.parent.stem + '_reduced') / velo_path.name
        if velo_reduced_path.exists():
            velo_path = velo_reduced_path
        points = np.fromfile(str(velo_path), dtype=np.float32,
                             count=-1).reshape([-1, self.NumPointFeatures])
        res["lidar"]["points"] = points
        image_info = info["image"]
        image_path = image_info['image_path']
        if read_image:
            image_path = self._root_path / image_path
            with open(str(image_path), 'rb') as f:
                image_str = f.read()
            res["cam"] = {
                "type": "camera",
                "data": image_str,
                "datatype": image_path.suffix[1:],
            }
        calib = info["calib"]
        calib_dict = {
            'rect': calib['R0_rect'],
            'Trv2c': calib['Tr_velo_to_cam'],
            'P2': calib['P2'],
        }
        res["calib"] = calib_dict
        if 'annos' in info:
            annos = info['annos']
            # we need other objects to avoid collision when sample
            annos = kitti.remove_dontcare(annos)
            locs = annos["location"]
            dims = annos["dimensions"]
            rots = annos["rotation_y"]
            gt_names = annos["name"]
            # rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), rots], axis=1)
            gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]],
                                      axis=1).astype(np.float32)
            calib = info["calib"]
            gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes,
                                                      calib["R0_rect"],
                                                      calib["Tr_velo_to_cam"])

            # only center format is allowed. so we need to convert
            # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
            box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
                                            [0.5, 0.5, 0.5])
            res["lidar"]["annotations"] = {
                'boxes': gt_boxes,
                'names': gt_names,
            }
            res["cam"]["annotations"] = {
                'boxes': annos["bbox"],
                'names': gt_names,
            }

        return res

コード例 #6

ファイルを表示

    def get_sensor_data(self, query):
        read_image = False
        colored_lidar = False
        create_init_lidar = True
        idx = query
        if isinstance(query, dict):
            colored_lidar = "colored" in query["lidar"]
            read_image = ("cam" in query) or colored_lidar
            create_init_lidar = "init_lidar" in query
            if create_init_lidar:
                assert "num_beams" in query["init_lidar"]
            assert "lidar" in query
            idx = query["lidar"]["idx"]
        info = self._vkitti_infos[idx]
        res = {
            "lidar": {
                "type": "lidar",
                "points": None,
                "colored": False
            },
            "metadata": {
                "image_idx": info["image"]["image_idx"],
                "image_shape": info["image"]["image_shape"],
            },
            "calib": None,
            "cam": {},
            "depth": {
                "type": "depth_map",
                "image": None
            },
            "init_lidar": {
                "num_beams": None,
                "points": None
            }
        }

        image_info = info["image"]
        image_path = image_info['image_path']
        if read_image:
            image_path = self._root_path / image_path
            with open(str(image_path), 'rb') as f:
                image_str = f.read()
            res["cam"] = {
                "type": "camera",
                "data": image_str,
                "datatype": image_path.suffix[1:],
            }

        # Will always produce depth, needed for lidar.
        depth_info = info["depth"]
        depth_path = Path(depth_info["depth_path"])
        if not depth_path.is_absolute():
            depth_path = Path(self._root_path) / depth_info["depth_path"]

        np_depth_image = np.array(
            Image.open(depth_path))  # (W, H) dtype=np.int32
        # Convert centimeters (int32) to meters.
        np_depth_image = np_depth_image.astype(
            np.float32) / 100  # (W, H) dtype=np.float32
        res["depth"] = {"type": "depth_map", "image": np_depth_image}

        if colored_lidar:
            # Concatenate depth map with colors.
            np_rgb_image = np.array(Image.open(
                io.BytesIO(image_str)))  # (H, W, 3)
            H, W = np_depth_image.shape
            np_depth_image = np.concatenate(
                (np_depth_image.reshape(H, W, 1), np_rgb_image),
                axis=2)  # (H, W, 4)
            res["lidar"]["colored"] = True
        else:
            np_depth_image = np_depth_image[..., np.newaxis]  # (H, W, 1)
            np_depth_image = np.concatenate(
                (np_depth_image, np.ones_like(np_depth_image)),
                axis=2)  # (H, W, 2)

        points = cam_transforms.depth_map_to_point_cloud(
            np_depth_image,
            self.lc_device.CAMERA_PARAMS['matrix'])  # (N, 4) or (N, 6)

        # Convert from camera frame to velo frame.
        cam2velo = self.lc_device.TRANSFORMS[
            'cTw']  # inverse extrinsics matrix
        xyz_velo = np.hstack(
            (points[:, :3], np.ones([len(points), 1], dtype=points.dtype)))
        xyz_velo = xyz_velo @ cam2velo.T
        points = np.hstack((xyz_velo[:, :3], points[:, 3:]))

        # Simulated LiDAR points.
        if create_init_lidar and query["init_lidar"]["num_beams"] > 1:
            num_beams = query["init_lidar"]["num_beams"]
            if self.mb_lidar_mask is None or self.mb_lidar_mask[
                    "num_beams"] != num_beams:
                self.create_mb_lidar_mask(num_beams)
            init_lidar_points = points[
                self.mb_lidar_mask["1d_mask"]]  # (R, 4) or (R, 6)
            init_lidar_points = init_lidar_points[:, :3]  # (R, 3)
            # Only select points with less than MAX_RANGE value.
            init_lidar_points = init_lidar_points[
                init_lidar_points[:, 0] < self._MAX_RANGE, :]
            res["init_lidar"]["points"] = init_lidar_points

        # Only select points with less than MAX_RANGE value.
        points = points[points[:, 0] < self._MAX_RANGE, :]
        res["lidar"]["points"] = points

        # Compute single-beam lidar.
        if create_init_lidar and query["init_lidar"]["num_beams"] == 1:
            init_lidar_points = self.convert_points_to_sb_lidar(points)
            init_lidar_points = init_lidar_points[:, :3]  # (N, 3)
            res["init_lidar"]["points"] = init_lidar_points

        R0_rect = np.eye(
            4, dtype=np.float32)  # there is no rectification in vkitti
        Tr_velo_to_cam = self.lc_device.TRANSFORMS["wTc"]
        # Extended intrinsics matrix: shape= (4, 4).
        P2 = np.eye(4, dtype=np.float32)
        P2[:3, :3] = self.lc_device.CAMERA_PARAMS["matrix"]

        calib_dict = {
            'R0_rect': R0_rect,
            'Tr_velo_to_cam': Tr_velo_to_cam,
            'P2': P2
        }
        res["calib"] = calib_dict

        if 'annos' in info:
            annos = info['annos']
            # we need other objects to avoid collision when sample
            annos = vkitti.remove_dontcare(annos)
            locs = annos["location"]
            dims = annos["dimensions"]
            rots = annos["rotation_y"]
            gt_names = annos["name"]
            gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]],
                                      axis=1).astype(np.float32)
            calib = info["calib"]
            gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes,
                                                      r_rect=R0_rect,
                                                      velo2cam=Tr_velo_to_cam)

            # only center format is allowed. so we need to convert
            # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
            box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
                                            [0.5, 0.5, 0.5])
            res["lidar"]["annotations"] = {
                'boxes': gt_boxes,
                'names': gt_names,
            }
            res["cam"]["annotations"] = {
                'boxes': annos["bbox"],
                'names': gt_names,
            }

        return res

コード例 #7

ファイルを表示

ファイル: backend.py プロジェクト: zhb0920/second.pytorch

def get_pointcloud():
    global BACKEND
    instance = request.json
    response = {"status": "normal"}
    if BACKEND.root_path is None:
        return error_response("root path is not set")
    if BACKEND.kitti_infos is None:
        return error_response("kitti info is not loaded")
    image_idx = instance["image_idx"]
    enable_int16 = instance["enable_int16"]

    idx = BACKEND.image_idxes.index(image_idx)
    kitti_info = BACKEND.kitti_infos[idx]
    pc_info = kitti_info["point_cloud"]
    image_info = kitti_info["image"]
    calib = kitti_info["calib"]

    rect = calib['R0_rect']
    Trv2c = calib['Tr_velo_to_cam']
    P2 = calib['P2']

    img_shape = image_info["image_shape"]  # hw
    wh = np.array(img_shape[::-1])
    whwh = np.tile(wh, 2)
    if 'annos' in kitti_info:
        annos = kitti_info['annos']
        labels = annos['name']
        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
        dims = annos['dimensions'][:num_obj]
        loc = annos['location'][:num_obj]
        rots = annos['rotation_y'][:num_obj]
        bbox = annos['bbox'][:num_obj] / whwh
        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                         axis=1)
        gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes_camera, rect, Trv2c)
        box_np_ops.change_box3d_center_(gt_boxes,
                                        src=[0.5, 0.5, 0],
                                        dst=[0.5, 0.5, 0.5])
        locs = gt_boxes[:, :3]
        dims = gt_boxes[:, 3:6]
        rots = np.concatenate(
            [np.zeros([num_obj, 2], dtype=np.float32), -gt_boxes[:, 6:7]],
            axis=1)
        frontend_annos = {}
        response["locs"] = locs.tolist()
        response["dims"] = dims.tolist()
        response["rots"] = rots.tolist()
        response["bbox"] = bbox.tolist()

        response["labels"] = labels[:num_obj].tolist()
    response["num_features"] = pc_info["num_features"]
    v_path = str(Path(BACKEND.root_path) / pc_info['velodyne_path'])
    points = np.fromfile(v_path, dtype=np.float32,
                         count=-1).reshape([-1, pc_info["num_features"]])
    if instance['remove_outside']:
        if 'image_shape' in image_info:
            image_shape = image_info['image_shape']
            points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2,
                                                      image_shape)
        else:
            points = points[points[..., 2] > 0]
    if enable_int16:
        int16_factor = instance["int16_factor"]
        points *= int16_factor
        points = points.astype(np.int16)
    pc_str = base64.b64encode(points.tobytes())
    response["pointcloud"] = pc_str.decode("utf-8")
    if "with_det" in instance and instance["with_det"]:
        if BACKEND.dt_annos is None:
            return error_response("det anno is not loaded")
        dt_annos = BACKEND.dt_annos[idx]
        dims = dt_annos['dimensions']
        num_obj = dims.shape[0]
        loc = dt_annos['location']
        rots = dt_annos['rotation_y']
        bbox = dt_annos['bbox'] / whwh
        labels = dt_annos['name']

        dt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                         axis=1)
        dt_boxes = box_np_ops.box_camera_to_lidar(dt_boxes_camera, rect, Trv2c)
        box_np_ops.change_box3d_center_(dt_boxes,
                                        src=[0.5, 0.5, 0],
                                        dst=[0.5, 0.5, 0.5])
        locs = dt_boxes[:, :3]
        dims = dt_boxes[:, 3:6]
        rots = np.concatenate(
            [np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]],
            axis=1)
        response["dt_locs"] = locs.tolist()
        response["dt_dims"] = dims.tolist()
        response["dt_rots"] = rots.tolist()
        response["dt_labels"] = labels.tolist()
        response["dt_bbox"] = bbox.tolist()
        response["dt_scores"] = dt_annos["score"].tolist()

    # if "score" in annos:
    #     response["score"] = score.tolist()
    response = jsonify(results=[response])
    response.headers['Access-Control-Allow-Headers'] = '*'
    print("send response with size {}!".format(len(pc_str)))
    return response

コード例 #8

ファイルを表示

ファイル: dataset.py プロジェクト: zhb0920/second.pytorch

 def __getitem__(self, idx):
     """
     you need to create a input dict in this function for network inference.
     format: {
         anchors
         voxels
         num_points
         coordinates
         ground_truth: {
             gt_boxes
             gt_names
             [optional]difficulty
             [optional]group_ids
         }
         [optional]anchors_mask, slow in SECOND v1.5, don't use this.
         [optional]metadata, in kitti, image index is saved in metadata
     }
     """
     info = self._kitti_infos[idx]
     kitti.convert_to_kitti_info_version2(info)
     pc_info = info["point_cloud"]
     if "points" not in pc_info:
         velo_path = pathlib.Path(pc_info['velodyne_path'])
         if not velo_path.is_absolute():
             velo_path = pathlib.Path(
                 self._root_path) / pc_info['velodyne_path']
         velo_reduced_path = velo_path.parent.parent / (
             velo_path.parent.stem + '_reduced') / velo_path.name
         if velo_reduced_path.exists():
             velo_path = velo_reduced_path
         points = np.fromfile(str(velo_path), dtype=np.float32,
                              count=-1).reshape(
                                  [-1, self._num_point_features])
     input_dict = {
         'points': points,
     }
     if "image" in info:
         input_dict["image"] = info["image"]
     if "calib" in info:
         calib = info["calib"]
         calib_dict = {
             'rect': calib['R0_rect'],
             'Trv2c': calib['Tr_velo_to_cam'],
             'P2': calib['P2'],
         }
         input_dict["calib"] = calib_dict
     if 'annos' in info:
         annos = info['annos']
         annos_dict = {}
         # we need other objects to avoid collision when sample
         annos = kitti.remove_dontcare(annos)
         loc = annos["location"]
         dims = annos["dimensions"]
         rots = annos["rotation_y"]
         gt_names = annos["name"]
         gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                   axis=1).astype(np.float32)
         if "calib" in info:
             calib = info["calib"]
             gt_boxes = box_np_ops.box_camera_to_lidar(
                 gt_boxes, calib["R0_rect"], calib["Tr_velo_to_cam"])
             # only center format is allowed. so we need to convert
             # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
             box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
                                             [0.5, 0.5, 0.5])
         gt_dict = {
             'gt_boxes': gt_boxes,
             'gt_names': gt_names,
         }
         if 'difficulty' in annos:
             gt_dict['difficulty'] = annos["difficulty"]
         if 'group_ids' in annos:
             gt_dict['group_ids'] = annos["group_ids"]
         input_dict["ground_truth"] = gt_dict
     example = self._prep_func(input_dict=input_dict)
     example["metadata"] = {}
     if "image" in info:
         example["metadata"]["image"] = input_dict["image"]
     if "anchors_mask" in example:
         example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
     return example

コード例 #9

ファイルを表示

    def __getitem__(self, idx):
        """
        Args:
            idx: (int) this is the index of the frame, from 0 to len(Video) - 1
        """
        frame_info = self.video_info[idx]  # frame info
        root_path = Path(os.environ["DATADIR"]) / "synthia"

        res = {
            "depth": None,  # np.ndarray, dtype=np.float32, shape=(H, W)
            "points": None,
            "cam": {
                "image_str": None,  # str, image string
                "datatype": None,  # str, suffix type
            },
            "metadata": {
                "frameid": frame_info["frameid"],
                "image_shape": frame_info["image"]["image_shape"]
            },
            "calib": frame_info["calib"],
            "annos": None
        }

        # --------------------------------------------------------------------------------------------------------------
        # depth
        # --------------------------------------------------------------------------------------------------------------
        depth_path = Path(frame_info["depth"]["depth_path"])
        if not depth_path.is_absolute():
            depth_path = root_path / depth_path
        # synthia depth formula: "Depth = 5000 * (R + G*256 + B*256*256) / (256*256*256 - 1)"
        np_depth_image = np.array(
            Image.open(depth_path))  # (H, W, 4) dtype=np.uint8
        R, G, B = [np_depth_image[:, :, e].astype(np.int64)
                   for e in range(3)]  # (H, W), dtype=np.int64
        np_depth_image = 5000 * (R + G * 256 + B * 256 * 256) / (
            256 * 256 * 256 - 1)  # (H, W) dtype=np.float64
        np_depth_image = np_depth_image.astype(
            np.float32)  # (H, W) dtype=np.float32
        res["depth"] = np_depth_image

        # --------------------------------------------------------------------------------------------------------------
        # cam
        # --------------------------------------------------------------------------------------------------------------
        if self.cam or self.colored_pc:
            image_path = Path(frame_info['image']['image_path'])
            if not image_path.is_absolute():
                image_path = root_path / image_path
            with open(str(image_path), 'rb') as f:
                image_str = f.read()
            res["cam"]["image_str"] = image_str
            res["cam"]["datatype"] = image_path.suffix[1:]

        # --------------------------------------------------------------------------------------------------------------
        # points
        # --------------------------------------------------------------------------------------------------------------
        np_depth_image = np_depth_image[..., np.newaxis]  # (H, W, 1)
        if self.colored_pc:
            # concatenate depth map with colors
            np_rgb_image = np.array(Image.open(
                io.BytesIO(image_str)))  # (H, W, 4)
            np_rgb_image = np_rgb_image[:, :, :3]  # (H, W, 3)
            np_depth_image = np.concatenate([np_depth_image, np_rgb_image],
                                            axis=2)  # (H, W, 4)

        # points in cam frame
        P2 = frame_info['calib']['P2']  # intrinsics matrix
        if P2.shape == (4, 4):
            P2 = P2[:3, :3]
        else:
            assert P2.shape == (3, 3)
        points = cam_transforms.depth_map_to_point_cloud(
            np_depth_image, P2)  # (N, 3) or (N, 6)

        # points in velo frame
        Tr_velo_to_cam = frame_info['calib'][
            'Tr_velo_to_cam']  # extrinsics matrix
        Tr_cam_to_velo = np.linalg.inv(Tr_velo_to_cam)
        xyz1_cam = np.hstack(
            (points[:, :3], np.ones([len(points), 1],
                                    dtype=points.dtype)))  # (N, 4)
        xyz1_velo = xyz1_cam @ Tr_cam_to_velo.T  # (N, 4)
        points = np.hstack((xyz1_velo[:, :3], points[:,
                                                     3:]))  # (N, 3) or (N, 6)

        # points within MAX_DEPTH
        points = points[points[:, 0] < self._MAX_DEPTH, :]  # (M, 3) or (M, 6)
        res["points"] = points

        # --------------------------------------------------------------------------------------------------------------
        # annos
        # --------------------------------------------------------------------------------------------------------------
        annos = frame_info['annos']
        annos = self._remove_dontcare(annos)
        locs = annos["location"]
        dims = annos["dimensions"]
        rots = annos["rotation_y"]
        gt_names = annos["name"]
        gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]],
                                  axis=1).astype(np.float32)
        gt_boxes = box_np_ops.box_camera_to_lidar(
            gt_boxes,
            r_rect=frame_info["calib"]["R0_rect"],
            velo2cam=frame_info["calib"]["Tr_velo_to_cam"])
        # only center format is allowed. so we need to convert kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
        box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
                                        [0.5, 0.5, 0.5])
        res["annos"] = {
            'names': gt_names,
            'boxes': gt_boxes,
            'boxes2d': annos["bbox"]
        }

        return res

コード例 #10

ファイルを表示

ファイル: preprocess.py プロジェクト: sky77764/center3d.pytorch

def prep_pointcloud(
        input_dict,
        root_path,
        # voxel_generator,
        fv_generator,
        target_assigner,
        db_sampler=None,
        max_voxels=20000,
        class_names=['Car'],
        remove_outside_points=False,
        training=True,
        create_targets=True,
        shuffle_points=False,
        reduce_valid_area=False,
        remove_unknown=False,
        gt_rotation_noise=[-np.pi / 3, np.pi / 3],
        gt_loc_noise_std=[1.0, 1.0, 1.0],
        global_rotation_noise=[-np.pi / 4, np.pi / 4],
        global_scaling_noise=[0.95, 1.05],
        global_loc_noise_std=(0.2, 0.2, 0.2),
        global_random_rot_range=[0.78, 2.35],
        generate_bev=False,
        without_reflectivity=False,
        num_point_features=4,
        anchor_area_threshold=1,
        gt_points_drop=0.0,
        gt_drop_max_keep=10,
        remove_points_after_sample=False,
        anchor_cache=None,
        remove_environment=False,
        random_crop=False,
        reference_detections=None,
        add_rgb_to_points=False,
        lidar_input=False,
        unlabeled_db_sampler=None,
        out_size_factor=2,
        min_gt_point_dict=None,
        bev_only=False,
        use_group_id=False,
        out_dtype=np.float32,
        num_classes=1,
        RGB_embedding=False):
    """convert point cloud to voxels, create targets if ground truths 
    exists.
    """
    # prep_pointcloud_start = time.time()
    points = input_dict["points"]
    # if training:
    gt_boxes = input_dict["gt_boxes"]
    gt_names = input_dict["gt_names"]
    difficulty = input_dict["difficulty"]
    group_ids = None
    if use_group_id and "group_ids" in input_dict:
        group_ids = input_dict["group_ids"]

    rect = input_dict["rect"]
    Trv2c = input_dict["Trv2c"]
    P2 = input_dict["P2"]
    unlabeled_training = unlabeled_db_sampler is not None
    image_idx = input_dict["image_idx"]

    # t1 = time.time() - prep_pointcloud_start
    if shuffle_points:
        # shuffle is a little slow.
        np.random.shuffle(points)
    # t2 = time.time() - prep_pointcloud_start
    # print("t2-t1: ", t2-t1)     # 0.035

    if reference_detections is not None:
        C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2)
        frustums = box_np_ops.get_frustum_v2(reference_detections, C)
        frustums -= T
        # frustums = np.linalg.inv(R) @ frustums.T
        frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums)
        frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c)
        surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums)
        masks = points_in_convex_polygon_3d_jit(points, surfaces)
        points = points[masks.any(-1)]

    if remove_outside_points:  # and not lidar_input:
        image_shape = input_dict["image_shape"]
        points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2,
                                                  image_shape)
    if remove_environment is True:  # and training:
        selected = kitti.keep_arrays_by_name(gt_names, class_names)
        gt_boxes = gt_boxes[selected]
        gt_names = gt_names[selected]
        difficulty = difficulty[selected]
        if group_ids is not None:
            group_ids = group_ids[selected]
        points = prep.remove_points_outside_boxes(points, gt_boxes)

    # if training:
    # print(gt_names)
    selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"])
    gt_boxes = gt_boxes[selected]
    gt_names = gt_names[selected]
    difficulty = difficulty[selected]
    if group_ids is not None:
        group_ids = group_ids[selected]
    # t3 = time.time() - prep_pointcloud_start
    # print("t3-t2: ", t3 - t2)   # 0.0002
    gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c)

    if remove_unknown:
        remove_mask = difficulty == -1
        """
        gt_boxes_remove = gt_boxes[remove_mask]
        gt_boxes_remove[:, 3:6] += 0.25
        points = prep.remove_points_in_boxes(points, gt_boxes_remove)
        """
        keep_mask = np.logical_not(remove_mask)
        gt_boxes = gt_boxes[keep_mask]
        gt_names = gt_names[keep_mask]
        difficulty = difficulty[keep_mask]
        if group_ids is not None:
            group_ids = group_ids[keep_mask]
    gt_boxes_mask = np.array([n in class_names for n in gt_names],
                             dtype=np.bool_)
    # t4 = time.time() - prep_pointcloud_start
    # print("t4-t3: ", t4 - t3)   # 0.001
    if RGB_embedding:
        RGB_image = cv2.imread(input_dict['image_path'])
        points_camera = box_np_ops.box_lidar_to_camera(points[:, :3], rect,
                                                       Trv2c)
        points_to_image_idx = box_np_ops.project_to_image(points_camera, P2)
        points_to_image_idx = points_to_image_idx.astype(int)
        mask = box_np_ops.remove_points_outside_image(RGB_image,
                                                      points_to_image_idx)
        points = points[mask]
        points_to_image_idx = points_to_image_idx[mask]
        BGR = RGB_image[points_to_image_idx[:, 1], points_to_image_idx[:, 0]]

        points = np.concatenate((points, BGR), axis=1)

        # test_mask = points_camera[mask][:, 0] < 0
        # test_image_idx = points_to_image_idx[test_mask]
        # RGB_image[test_image_idx[:, 1], test_image_idx[:, 0]] = [255, 0, 0]
        # test_mask = points_camera[mask][:, 0] >= 0
        # test_image_idx = points_to_image_idx[test_mask]
        # RGB_image[test_image_idx[:, 1], test_image_idx[:, 0]] = [0, 0, 255]
        # print()
    # t5 = time.time() - prep_pointcloud_start
    # print("t5-t4: ", t5 - t4)   # 0.019
    # TODO
    if db_sampler is not None and training:  # and not RGB_embedding:
        if RGB_embedding:
            num_point_features += 3

        fg_points_mask = box_np_ops.points_in_rbbox(points, gt_boxes)
        fg_points_list = []
        bg_points_mask = np.zeros((points.shape[0]), dtype=bool)
        for i in range(fg_points_mask.shape[1]):
            fg_points_list.append(points[fg_points_mask[:, i]])
            bg_points_mask = np.logical_or(bg_points_mask, fg_points_mask[:,
                                                                          i])
        bg_points_mask = np.logical_not(bg_points_mask)
        sampled_dict = db_sampler.sample_all(root_path,
                                             points[bg_points_mask],
                                             gt_boxes,
                                             gt_names,
                                             fg_points_list,
                                             num_point_features,
                                             random_crop,
                                             gt_group_ids=group_ids,
                                             rect=rect,
                                             Trv2c=Trv2c,
                                             P2=P2)

        # sampled_dict = db_sampler.sample_all(
        #     root_path,
        #     gt_boxes,
        #     gt_names,
        #     num_point_features,
        #     random_crop,
        #     gt_group_ids=group_ids,
        #     rect=rect,
        #     Trv2c=Trv2c,
        #     P2=P2)
        # t_sample_all = time.time() - prep_pointcloud_start
        # print("t_sample_all - t5: ", t_sample_all - t5)     # 3.83

        if sampled_dict is not None:
            sampled_gt_names = sampled_dict["gt_names"]
            sampled_gt_boxes = sampled_dict["gt_boxes"]
            points = sampled_dict["points"]
            sampled_gt_masks = sampled_dict["gt_masks"]
            remained_boxes_idx = sampled_dict["remained_boxes_idx"]
            # gt_names = gt_names[gt_boxes_mask].tolist()
            gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0)
            # gt_names += [s["name"] for s in sampled]
            gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes])
            gt_boxes_mask = np.concatenate([gt_boxes_mask, sampled_gt_masks],
                                           axis=0)

            gt_names = gt_names[remained_boxes_idx]
            gt_boxes = gt_boxes[remained_boxes_idx]
            gt_boxes_mask = gt_boxes_mask[remained_boxes_idx]

            if group_ids is not None:
                sampled_group_ids = sampled_dict["group_ids"]
                group_ids = np.concatenate([group_ids, sampled_group_ids])
                group_ids = group_ids[remained_boxes_idx]

            # if remove_points_after_sample:
            #     # points = prep.remove_points_in_boxes(
            #     #     points, sampled_gt_boxes)
            #     locs = sampled_gt_boxes[:, 0:3]
            #     dims = sampled_gt_boxes[:, 3:6]
            #     angles = sampled_gt_boxes[:, 6]
            #     camera_box_origin = [0.5, 0.5, 0]
            #
            #     box_corners = box_np_ops.center_to_corner_box3d(
            #         locs, dims, angles, camera_box_origin, axis=2)
            #     box_corners_in_image = box_np_ops.project_to_fv_image(
            #         box_corners, example['grid_size'][i], example['meta'][i])
            #     box_centers_in_image = box_np_ops.project_to_fv_image(
            #         locs, example['grid_size'][i], example['meta'][i])

            # t_sample_all2 = time.time() - prep_pointcloud_start
            # print("t_sample_all2 - t_sample_all: ", t_sample_all2 - t_sample_all)   # 0.0002

    # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_)
    # if without_reflectivity and training:
    #     used_point_axes = list(range(num_point_features))
    #     used_point_axes.pop(3)
    #     points = points[:, used_point_axes]
    # pc_range = voxel_generator.point_cloud_range
    # bev_only = False
    # if bev_only:  # set z and h to limits
    #     gt_boxes[:, 2] = pc_range[2]
    #     gt_boxes[:, 5] = pc_range[5] - pc_range[2]
    if training:
        gt_loc_noise_std = [0.0, 0.0, 0.0]
        prep.noise_per_object_v3_(
            gt_boxes,
            points,
            gt_boxes_mask,
            rotation_perturb=gt_rotation_noise,
            center_noise_std=gt_loc_noise_std,
            global_random_rot_range=global_random_rot_range,
            group_ids=group_ids,
            num_try=100)
        # t_noise = time.time() - prep_pointcloud_start
        # print("t_noise - t_sample_all2: ", t_noise - t_sample_all2)     # 12.01
    # should remove unrelated objects after noise per object
    gt_boxes = gt_boxes[gt_boxes_mask]
    gt_names = gt_names[gt_boxes_mask]
    if group_ids is not None:
        group_ids = group_ids[gt_boxes_mask]
    gt_classes = np.array([class_names.index(n) + 1 for n in gt_names],
                          dtype=np.int32)

    # t6 = time.time() - prep_pointcloud_start
    # print("t6-t5: ", t6 - t5)   # 16.0

    if training:
        gt_boxes, points = prep.random_flip(gt_boxes, points)
        # gt_boxes, points = prep.global_rotation(
        #     gt_boxes, points, rotation=global_rotation_noise)
        gt_boxes, points = prep.global_scaling_v2(gt_boxes, points,
                                                  *global_scaling_noise)
        # Global translation
        # gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std)

    # bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    bv_range = [0, -40, 70.4, 40]
    mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range)
    gt_boxes = gt_boxes[mask]
    gt_classes = gt_classes[mask]
    if group_ids is not None:
        group_ids = group_ids[mask]

    # limit rad to [-pi, pi]
    gt_boxes[:, 6] = box_np_ops.limit_period(gt_boxes[:, 6],
                                             offset=0.5,
                                             period=2 * np.pi)

    # TODO
    # if shuffle_points:
    #     # shuffle is a little slow.
    #     np.random.shuffle(points)

    # # t7 = time.time() - prep_pointcloud_start
    # # print("t7-t6: ", t7 - t6)   # 1.95
    # voxels, coordinates, num_points = voxel_generator.generate(
    #     points, max_voxels, RGB_embedding=RGB_embedding)
    # # t8 = time.time() - prep_pointcloud_start
    # # print("t8-t7: ", t8 - t7)   # 2.0
    # voxel_size = voxel_generator.voxel_size
    # grid_size = voxel_generator.grid_size
    # pc_range = copy.deepcopy(voxel_generator.point_cloud_range)
    # grid_size = voxel_generator.grid_size
    # phi_min = voxel_generator.phi_min
    # theta_min = voxel_generator.theta_min
    # image_h, image_w = grid_size[1], grid_size[0]
    # c = np.array([image_w / 2., image_h / 2.])
    # s = np.array([image_w, image_h], dtype=np.int32)
    # meta = {'c': c, 's': s, 'calib': P2, 'phi_min': phi_min, 'theta_min': theta_min}

    # t7 = time.time() - prep_pointcloud_start
    # print("t7-t6: ", t7 - t6)   # 1.95
    fv_image, points_mask = fv_generator.generate(points,
                                                  RGB_embedding=RGB_embedding,
                                                  occupancy_embedding=False)

    # t8 = time.time() - prep_pointcloud_start
    # print("t8-t7: ", t8 - t7)   # 2.0

    fv_dim = fv_generator.fv_dim
    pc_range = copy.deepcopy(fv_generator.spherical_coord_range)
    grid_size = fv_generator.grid_size
    phi_min = fv_generator.phi_min
    theta_min = fv_generator.theta_min
    image_h, image_w = fv_dim[1], fv_dim[0]
    c = np.array([image_w / 2., image_h / 2.])
    s = np.array([image_w, image_h], dtype=np.int32)
    meta = {
        'c': c,
        's': s,
        'calib': P2,
        'phi_min': phi_min,
        'theta_min': theta_min
    }

    fv_image = np.transpose(fv_image, [2, 1, 0])
    max_objs = 50
    num_objs = min(gt_boxes.shape[0], max_objs)

    box_np_ops.change_box3d_center_(gt_boxes,
                                    src=[0.5, 0.5, 0],
                                    dst=[0.5, 0.5, 0.5])
    spherical_gt_boxes = np.zeros((max_objs, gt_boxes.shape[1]))
    spherical_gt_boxes[:num_objs, :] = gt_boxes[:num_objs, :]
    spherical_gt_boxes[:num_objs, :] = convert_to_spherical_coor(
        gt_boxes[:num_objs, :])
    spherical_gt_boxes[:num_objs, 0] -= phi_min
    spherical_gt_boxes[:num_objs, 1] -= theta_min
    spherical_gt_boxes[:num_objs, 0] /= grid_size[0]
    spherical_gt_boxes[:num_objs, 1] /= grid_size[1]

    spherical_gt_boxes, num_objs = filter_outside_range(
        spherical_gt_boxes, num_objs, fv_dim)

    # t9 = time.time() - prep_pointcloud_start
    # print("t9-t8: ", t9 - t8)   # 0.0005
    example = {
        'fv_image': fv_image,
        'grid_size': grid_size,
        'pc_range': pc_range,
        'meta': meta,
        'spherical_gt_boxes': spherical_gt_boxes,
        'resized_image_shape': grid_size
    }

    example.update({'rect': rect, 'Trv2c': Trv2c, 'P2': P2})
    if RGB_embedding:
        RGB_image = cv2.resize(RGB_image, (image_w, image_h))
        example.update({'RGB_image': RGB_image})

    if training:
        hm = np.zeros((num_classes, image_h, image_w), dtype=np.float32)
        reg = np.zeros((max_objs, 2), dtype=np.float32)
        dep = np.zeros((max_objs, 1), dtype=np.float32)
        rotbin = np.zeros((max_objs, 2), dtype=np.int64)
        rotres = np.zeros((max_objs, 2), dtype=np.float32)
        dim = np.zeros((max_objs, 3), dtype=np.float32)
        ind = np.zeros((max_objs), dtype=np.int64)
        reg_mask = np.zeros((max_objs), dtype=np.uint8)
        rot_mask = np.zeros((max_objs), dtype=np.uint8)
        #
        # hm = np.zeros((num_classes, image_h, image_w), dtype=np.float32)
        # reg = np.zeros((image_w, image_h, 2), dtype=np.float32)
        # dep = np.zeros((image_w, image_h, 1), dtype=np.float32)
        # rotbin = np.zeros((image_w, image_h, 2), dtype=np.int64)
        # rotres = np.zeros((image_w, image_h, 2), dtype=np.float32)
        # dim = np.zeros((image_w, image_h, 3), dtype=np.float32)
        # # ind = np.zeros((max_objs), dtype=np.int64)
        # fg_mask = np.zeros((image_w, image_h), dtype=np.uint8)
        # # rot_mask = np.zeros((max_objs), dtype=np.uint8)

        draw_gaussian = draw_umich_gaussian
        # center heatmap
        radius = int(image_h / 30)
        # radius = int(image_h / 25)

        for k in range(num_objs):
            gt_3d_box = spherical_gt_boxes[k]
            cls_id = 0

            # print('heatmap gaussian radius: ' + str(radius))
            ct = np.array([gt_3d_box[0], gt_3d_box[1]], dtype=np.float32)
            ct_int = ct.astype(np.int32)
            draw_gaussian(hm[cls_id], ct, radius)

            # depth(distance), wlh
            dep[k] = gt_3d_box[2]
            dim[k] = gt_3d_box[3:6]
            # dep[ct_int[0], ct_int[1], 0] = gt_3d_box[2]
            # dim[ct_int[0], ct_int[1], :] = gt_3d_box[3:6]

            # reg, ind, mask
            reg[k] = ct - ct_int
            ind[k] = ct_int[1] * image_w + ct_int[0]
            reg_mask[k] = rot_mask[k] = 1
            # fg_mask[ct_int[0], ct_int[1]] = 1

            # ry
            ry = gt_3d_box[6]
            if ry < np.pi / 6. or ry > 5 * np.pi / 6.:
                rotbin[k, 0] = 1
                rotres[k, 0] = ry - (-0.5 * np.pi)
                # rotbin[ct_int[0], ct_int[1], 0] = 1
                # rotres[ct_int[0], ct_int[1], 0] = ry - (-0.5 * np.pi)
            if ry > -np.pi / 6. or ry < -5 * np.pi / 6.:
                rotbin[k, 1] = 1
                rotres[k, 1] = ry - (0.5 * np.pi)
                # rotbin[ct_int[0], ct_int[1], 1] = 1
                # rotres[ct_int[0], ct_int[1], 1] = ry - (0.5 * np.pi)

        example.update({
            'hm': hm,
            'dep': dep,
            'dim': dim,
            'ind': ind,
            'rotbin': rotbin,
            'rotres': rotres,
            'reg_mask': reg_mask,
            'rot_mask': rot_mask,
            'reg': reg
        })
        # example.update({
        #     'hm': hm, 'dep': dep, 'dim': dim,
        #     'rotbin': rotbin, 'rotres': rotres,
        #     'fg_mask': fg_mask, 'reg': reg
        # })

    # t10 = time.time() - prep_pointcloud_start
    # print("total: ", t10)       # 19.58
    return example