Ejemplo n.º 1
0
def proj(R_est, t_est, R_gt, t_gt, K, pts):
    """Average distance of projections of object model vertices [px]
    - by Brachmann et al. (CVPR'16).
    :param R_est: 3x3 ndarray with the estimated rotation matrix.
    :param t_est: 3x1 ndarray with the estimated translation vector.
    :param R_gt: 3x3 ndarray with the ground-truth rotation matrix.
    :param t_gt: 3x1 ndarray with the ground-truth translation vector.
    :param K: 3x3 ndarray with an intrinsic camera matrix.
    :param pts: nx3 ndarray with 3D model points.
    :return: The calculated error.
    """
    proj_est = misc.project_pts(pts, K, R_est, t_est)
    proj_gt = misc.project_pts(pts, K, R_gt, t_gt)
    e = np.linalg.norm(proj_est - proj_gt, axis=1).mean()
    return e
Ejemplo n.º 2
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        anno = d["annotations"][0]  # only one instance per image
        imH, imW = img.shape[:2]
        mask = cocosegm2mask(anno["segmentation"], imH, imW)
        bbox = anno["bbox"]
        bbox_mode = anno["bbox_mode"]
        bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
        kpt3d = anno["bbox3d_and_center"]
        quat = anno["quat"]
        trans = anno["trans"]
        R = quat2mat(quat)
        # 0-based label
        cat_id = anno["category_id"]
        K = d["cam"]
        kpt_2d = misc.project_pts(kpt3d, K, R, trans)
        # # TODO: visualize pose and keypoints
        label = objs[cat_id]
        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
        img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
        img_vis_kpt2d = img.copy()
        img_vis_kpt2d = misc.draw_projected_box3d(
            img_vis_kpt2d, kpt_2d, middle_color=None, bottom_color=(128, 128, 128)
        )

        xyz_info = mmcv.load(anno["xyz_path"])
        xyz = np.zeros((imH, imW, 3), dtype=np.float32)
        xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
        x1, y1, x2, y2 = xyz_info["xyxy"]
        xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
        xyz_show = get_emb_show(xyz)

        grid_show(
            [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpt2d[:, :, [2, 1, 0]], depth, xyz_show],
            ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"],
            row=2,
            col=3,
        )
Ejemplo n.º 3
0
def proj_sym(R_est, t_est, R_gt, t_gt, K, pts, syms):
    """Average distance of projections of object model vertices [px]

    - by Brachmann et al. (CVPR'16).

    :param R_est: 3x3 ndarray with the estimated rotation matrix.
    :param t_est: 3x1 ndarray with the estimated translation vector.
    :param R_gt: 3x3 ndarray with the ground-truth rotation matrix.
    :param t_gt: 3x1 ndarray with the ground-truth translation vector.
    :param K: 3x3 ndarray with an intrinsic camera matrix.
    :param pts: nx3 ndarray with 3D model points.
    :return: The calculated error.
    """
    proj_est = misc.project_pts(pts, K, R_est, t_est)
    es = []
    for sym in syms:
        R_gt_sym = R_gt.dot(sym["R"])
        t_gt_sym = R_gt.dot(sym["t"]) + t_gt
        proj_gt_sym = misc.project_pts(pts, K, R_gt_sym, t_gt_sym)
        e = np.linalg.norm(proj_est - proj_gt_sym, axis=1).mean()
        es.append(e)
    return min(es)
Ejemplo n.º 4
0
def mspd(R_est, t_est, R_gt, t_gt, K, pts, syms):
    """Maximum Symmetry-Aware Projection Distance (MSPD).

    See: http://bop.felk.cvut.cz/challenges/bop-challenge-2019/

    :param R_est: 3x3 ndarray with the estimated rotation matrix.
    :param t_est: 3x1 ndarray with the estimated translation vector.
    :param R_gt: 3x3 ndarray with the ground-truth rotation matrix.
    :param t_gt: 3x1 ndarray with the ground-truth translation vector.
    :param K: 3x3 ndarray with the intrinsic camera matrix.
    :param pts: nx3 ndarray with 3D model points.
    :param syms: Set of symmetry transformations, each given by a dictionary with:
      - 'R': 3x3 ndarray with the rotation matrix.
      - 't': 3x1 ndarray with the translation vector.
    :return: The calculated error.
    """
    proj_est = misc.project_pts(pts, K, R_est, t_est)
    es = []
    for sym in syms:
        R_gt_sym = R_gt.dot(sym["R"])
        t_gt_sym = R_gt.dot(sym["t"]) + t_gt
        proj_gt_sym = misc.project_pts(pts, K, R_gt_sym, t_gt_sym)
        es.append(np.linalg.norm(proj_est - proj_gt_sym, axis=1).max())
    return min(es)
Ejemplo n.º 5
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(
        len(dicts),
        time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        imH, imW = img.shape[:2]
        annos = d["annotations"]
        masks = [
            cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos
        ]
        bboxes = [anno["bbox"] for anno in annos]
        bbox_modes = [anno["bbox_mode"] for anno in annos]
        bboxes_xyxy = np.array([
            BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS)
            for box, box_mode in zip(bboxes, bbox_modes)
        ])
        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
        quats = [anno["quat"] for anno in annos]
        transes = [anno["trans"] for anno in annos]
        Rs = [quat2mat(quat) for quat in quats]
        # 0-based label
        cat_ids = [anno["category_id"] for anno in annos]
        K = d["cam"]
        kpts_2d = [
            misc.project_pts(kpt3d, K, R, t)
            for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)
        ]
        # # TODO: visualize pose and keypoints
        labels = [objs[cat_id] for cat_id in cat_ids]
        for _i in range(len(annos)):
            img_vis = vis_image_mask_bbox_cv2(img,
                                              masks[_i:_i + 1],
                                              bboxes=bboxes_xyxy[_i:_i + 1],
                                              labels=labels[_i:_i + 1])
            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(),
                                                       kpts_2d[_i])
            if "test" not in dset_name:
                xyz_path = annos[_i]["xyz_path"]
                xyz_info = mmcv.load(xyz_path)
                x1, y1, x2, y2 = xyz_info["xyxy"]
                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
                xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop
                xyz_show = get_emb_show(xyz)
                xyz_crop_show = get_emb_show(xyz_crop)
                img_xyz = img.copy() / 255.0
                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) |
                            (xyz[:, :, 2] != 0)).astype("uint8")
                fg_idx = np.where(mask_xyz != 0)
                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0],
                                                            fg_idx[1], :3]
                img_xyz_crop = img_xyz[y1:y2 + 1, x1:x2 + 1, :]
                img_vis_crop = img_vis[y1:y2 + 1, x1:x2 + 1, :]
                # diff mask
                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1:y2 + 1,
                                                             x1:x2 + 1]

                grid_show(
                    [
                        img[:, :, [2, 1, 0]],
                        img_vis[:, :, [2, 1, 0]],
                        img_vis_kpts2d[:, :, [2, 1, 0]],
                        depth,
                        # xyz_show,
                        diff_mask_xyz,
                        xyz_crop_show,
                        img_xyz[:, :, [2, 1, 0]],
                        img_xyz_crop[:, :, [2, 1, 0]],
                        img_vis_crop,
                    ],
                    [
                        "img",
                        "vis_img",
                        "img_vis_kpts2d",
                        "depth",
                        "diff_mask_xyz",
                        "xyz_crop_show",
                        "img_xyz",
                        "img_xyz_crop",
                        "img_vis_crop",
                    ],
                    row=3,
                    col=3,
                )
            else:
                grid_show(
                    [
                        img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]],
                        img_vis_kpts2d[:, :, [2, 1, 0]], depth
                    ],
                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
                    row=2,
                    col=2,
                )
Ejemplo n.º 6
0
    def process(self, inputs, outputs, out_dict):
        """
        Args:
            inputs: the inputs to a model.
                It is a list of dict. Each dict corresponds to an image and
                contains keys like "height", "width", "file_name", "image_id", "scene_id".
            outputs:
        """
        cfg = self.cfg
        if cfg.TEST.USE_PNP:
            if cfg.TEST.PNP_TYPE.lower() == "ransac_pnp":
                return self.process_pnp_ransac(inputs, outputs, out_dict)
            elif cfg.TEST.PNP_TYPE.lower() == "net_iter_pnp":
                return self.process_net_and_pnp(inputs,
                                                outputs,
                                                out_dict,
                                                pnp_type="iter")
            elif cfg.TEST.PNP_TYPE.lower() == "net_ransac_pnp":
                return self.process_net_and_pnp(inputs,
                                                outputs,
                                                out_dict,
                                                pnp_type="ransac")
            elif cfg.TEST.PNP_TYPE.lower() == "net_ransac_pnp_rot":
                # use rot from PnP/RANSAC and translation from Net
                return self.process_net_and_pnp(inputs,
                                                outputs,
                                                out_dict,
                                                pnp_type="ransac_rot")
            else:
                raise NotImplementedError

        out_rots = out_dict["rot"].detach().to(self._cpu_device).numpy()
        out_transes = out_dict["trans"].detach().to(self._cpu_device).numpy()

        out_i = -1
        for i, (_input, output) in enumerate(zip(inputs, outputs)):
            start_process_time = time.perf_counter()
            for inst_i in range(len(_input["roi_img"])):
                out_i += 1
                file_name = _input["file_name"][inst_i]

                scene_im_id_split = _input["scene_im_id"][inst_i].split("/")
                K = _input["cam"][inst_i].cpu().numpy().copy()

                roi_label = _input["roi_cls"][inst_i]  # 0-based label
                score = _input["score"][inst_i]
                roi_label, cls_name = self._maybe_adapt_label_cls_name(
                    roi_label)
                if cls_name is None:
                    continue

                scene_id = scene_im_id_split[0]
                im_id = int(scene_im_id_split[1])

                # get pose
                rot_est = out_rots[inst_i]
                trans_est = out_transes[inst_i]

                if cfg.DEBUG:  # visualize pose
                    pose_est = np.hstack([rot_est, trans_est.reshape(3, 1)])
                    file_name = _input["file_name"][inst_i]

                    if f"{int(scene_id)}/{im_id}" != "9/499":
                        continue

                    im_ori = mmcv.imread(file_name, "color")

                    bbox = _input["bbox_est"][inst_i].cpu().numpy().copy()
                    x1, y1, x2, y2 = bbox
                    # center = np.array([(x1 + x2) / 2, (y1 + y2) / 2])
                    # scale = max(x2 - x1, y2 - y1) * 1.5

                    test_label = _input["roi_cls"][inst_i]
                    kpt_3d = self.kpts_3d[test_label]
                    # kpt_3d = self.kpts_axis_3d[test_label]
                    kpt_2d = misc.project_pts(kpt_3d, K, rot_est, trans_est)

                    gt_dict = self.gts[cls_name][file_name]
                    gt_rot = gt_dict["R"]
                    gt_trans = gt_dict["t"]
                    kpt_2d_gt = misc.project_pts(kpt_3d, K, gt_rot, gt_trans)

                    maxx, maxy, minx, miny = 0, 0, 1000, 1000
                    for i in range(len(kpt_2d)):
                        maxx, maxy, minx, miny = (
                            max(maxx, kpt_2d[i][0]),
                            max(maxy, kpt_2d[i][1]),
                            min(minx, kpt_2d[i][0]),
                            min(miny, kpt_2d[i][1]),
                        )
                        maxx, maxy, minx, miny = (
                            max(maxx, kpt_2d_gt[i][0]),
                            max(maxy, kpt_2d_gt[i][1]),
                            min(minx, kpt_2d_gt[i][0]),
                            min(miny, kpt_2d_gt[i][1]),
                        )
                    center = np.array([(minx + maxx) / 2, (miny + maxy) / 2])
                    scale = max(maxx - minx, maxy - miny) + 5

                    out_size = 256
                    zoomed_im = crop_resize_by_warp_affine(
                        im_ori, center, scale, out_size)
                    save_path = osp.join(
                        cfg.OUTPUT_DIR, "vis",
                        "{}_{}_{:06d}_no_bbox.png".format(
                            cls_name, scene_id, im_id))
                    mmcv.mkdir_or_exist(osp.dirname(save_path))
                    mmcv.imwrite(zoomed_im, save_path)
                    # yapf: disable
                    kpt_2d = np.array(
                        [
                            [(x - (center[0] - scale / 2)) * out_size / scale,
                             (y - (center[1] - scale / 2)) * out_size / scale]
                            for [x, y] in kpt_2d
                        ]
                    )

                    kpt_2d_gt = np.array(
                        [
                            [(x - (center[0] - scale / 2)) * out_size / scale,
                             (y - (center[1] - scale / 2)) * out_size / scale]
                            for [x, y] in kpt_2d_gt
                        ]
                    )
                    # yapf: enable
                    # draw est bbox
                    linewidth = 3
                    visualizer = MyVisualizer(zoomed_im[:, :, ::-1],
                                              self._metadata)
                    # zoomed_im_vis = visualizer.draw_axis3d_and_center(
                    #     kpt_2d, linewidth=linewidth, draw_center=True
                    # )
                    # visualizer.draw_bbox3d_and_center(
                    #     kpt_2d_gt, top_color=_BLUE, bottom_color=_GREY, linewidth=linewidth, draw_center=True
                    # )
                    zoomed_im_vis = visualizer.draw_bbox3d_and_center(
                        kpt_2d,
                        top_color=_GREEN,
                        bottom_color=_GREY,
                        linewidth=linewidth,
                        draw_center=True)
                    save_path = osp.join(
                        cfg.OUTPUT_DIR, "vis",
                        "{}_{}_{:06d}_gt_est.png".format(
                            cls_name, scene_id, im_id))
                    mmcv.mkdir_or_exist(osp.dirname(save_path))
                    zoomed_im_vis.save(save_path)
                    print("zoomed_in_vis saved to:", save_path)

                    im_vis = vis_image_bboxes_cv2(im_ori, [bbox],
                                                  [f"{cls_name}_{score}"])

                    self.ren.clear()
                    self.ren.draw_background(
                        mmcv.bgr2gray(im_ori, keepdim=True))
                    self.ren.draw_model(
                        self.ren_models[self.data_ref.objects.index(cls_name)],
                        pose_est)
                    ren_im, _ = self.ren.finish()
                    grid_show(
                        [ren_im[:, :, ::-1], im_vis[:, :, ::-1]],
                        [f"ren_im_{cls_name}", f"{scene_id}/{im_id}_{score}"],
                        row=1,
                        col=2,
                    )

                output["time"] += time.perf_counter() - start_process_time

                if cls_name not in self._predictions:
                    self._predictions[cls_name] = OrderedDict()

                result = {
                    "score": score,
                    "R": rot_est,
                    "t": trans_est,
                    "time": output["time"]
                }
                self._predictions[cls_name][file_name] = result
Ejemplo n.º 7
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(
        len(dicts),
        time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        imH, imW = img.shape[:2]
        annos = d["annotations"]
        masks = [
            cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos
        ]
        bboxes = [anno["bbox"] for anno in annos]
        bbox_modes = [anno["bbox_mode"] for anno in annos]
        bboxes_xyxy = np.array([
            BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS)
            for box, box_mode in zip(bboxes, bbox_modes)
        ])
        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
        quats = [anno["quat"] for anno in annos]
        transes = [anno["trans"] for anno in annos]
        Rs = [quat2mat(quat) for quat in quats]
        # 0-based label
        cat_ids = [anno["category_id"] for anno in annos]
        K = d["cam"]
        kpts_2d = [
            misc.project_pts(kpt3d, K, R, t)
            for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)
        ]
        # # TODO: visualize pose and keypoints
        labels = [objs[cat_id] for cat_id in cat_ids]
        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
        img_vis = vis_image_mask_bbox_cv2(img,
                                          masks,
                                          bboxes=bboxes_xyxy,
                                          labels=labels)
        img_vis_kpts2d = img.copy()
        for anno_i in range(len(annos)):
            img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d,
                                                       kpts_2d[anno_i])
        grid_show(
            [
                img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]],
                img_vis_kpts2d[:, :, [2, 1, 0]], depth
            ],
            [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"],
            row=2,
            col=2,
        )