예제 #1
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        anno = d["annotations"][0]  # only one instance per image
        imH, imW = img.shape[:2]
        mask = cocosegm2mask(anno["segmentation"], imH, imW)
        bbox = anno["bbox"]
        bbox_mode = anno["bbox_mode"]
        bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
        kpt3d = anno["bbox3d_and_center"]
        quat = anno["quat"]
        trans = anno["trans"]
        R = quat2mat(quat)
        # 0-based label
        cat_id = anno["category_id"]
        K = d["cam"]
        kpt_2d = misc.project_pts(kpt3d, K, R, trans)
        # # TODO: visualize pose and keypoints
        label = objs[cat_id]
        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
        img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
        img_vis_kpt2d = img.copy()
        img_vis_kpt2d = misc.draw_projected_box3d(
            img_vis_kpt2d, kpt_2d, middle_color=None, bottom_color=(128, 128, 128)
        )

        xyz_info = mmcv.load(anno["xyz_path"])
        xyz = np.zeros((imH, imW, 3), dtype=np.float32)
        xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
        x1, y1, x2, y2 = xyz_info["xyxy"]
        xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
        xyz_show = get_emb_show(xyz)

        grid_show(
            [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpt2d[:, :, [2, 1, 0]], depth, xyz_show],
            ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"],
            row=2,
            col=3,
        )
예제 #2
0
    def read_data(self, dataset_dict):
        """load image and annos random shift & scale bbox; crop, rescale."""
        cfg = self.cfg
        r_head_cfg = cfg.MODEL.CDPN.ROT_HEAD
        pnp_net_cfg = cfg.MODEL.CDPN.PNP_NET

        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below

        dataset_name = dataset_dict["dataset_name"]

        image = read_image_cv2(dataset_dict["file_name"],
                               format=self.img_format)
        # should be consistent with the size in dataset_dict
        utils.check_image_size(dataset_dict, image)
        im_H_ori, im_W_ori = image.shape[:2]

        # currently only replace bg for train ###############################
        if self.split == "train":
            # some synthetic data already has bg, img_type should be real or something else but not syn
            img_type = dataset_dict.get("img_type", "real")
            if img_type == "syn":
                log_first_n(logging.WARNING, "replace bg", n=10)
                assert "segmentation" in dataset_dict["inst_infos"]
                mask = cocosegm2mask(
                    dataset_dict["inst_infos"]["segmentation"], im_H_ori,
                    im_W_ori)
                image, mask_trunc = self.replace_bg(image.copy(),
                                                    mask,
                                                    return_mask=True)
            else:  # real image
                if np.random.rand() < cfg.INPUT.CHANGE_BG_PROB:
                    log_first_n(logging.WARNING, "replace bg for real", n=10)
                    assert "segmentation" in dataset_dict["inst_infos"]
                    mask = cocosegm2mask(
                        dataset_dict["inst_infos"]["segmentation"], im_H_ori,
                        im_W_ori)
                    image, mask_trunc = self.replace_bg(image.copy(),
                                                        mask,
                                                        return_mask=True)
                else:
                    mask_trunc = None

        # NOTE: maybe add or change color augment here ===================================
        if self.split == "train" and self.color_aug_prob > 0 and self.color_augmentor is not None:
            if np.random.rand() < self.color_aug_prob:
                if cfg.INPUT.COLOR_AUG_SYN_ONLY and img_type not in ["real"]:
                    image = self._color_aug(image, self.color_aug_type)
                else:
                    image = self._color_aug(image, self.color_aug_type)

        # other transforms (mainly geometric ones);
        # for 6d pose task, flip is now allowed in general except for some 2d keypoints methods
        image, transforms = T.apply_augmentations(self.augmentation, image)
        im_H, im_W = image_shape = image.shape[:2]  # h, w

        # NOTE: scale camera intrinsic if necessary ================================
        scale_x = im_W / im_W_ori
        scale_y = im_H / im_H_ori  # NOTE: generally scale_x should be equal to scale_y
        if "cam" in dataset_dict:
            if im_W != im_W_ori or im_H != im_H_ori:
                dataset_dict["cam"][0] *= scale_x
                dataset_dict["cam"][1] *= scale_y
            K = dataset_dict["cam"].astype("float32")
            dataset_dict["cam"] = torch.as_tensor(K)

        input_res = cfg.MODEL.CDPN.BACKBONE.INPUT_RES
        out_res = cfg.MODEL.CDPN.BACKBONE.OUTPUT_RES

        # CHW -> HWC
        coord_2d = get_2d_coord_np(im_W, im_H, low=0,
                                   high=1).transpose(1, 2, 0)

        #################################################################################
        if self.split != "train":
            # don't load annotations at test time
            test_bbox_type = cfg.TEST.TEST_BBOX_TYPE
            if test_bbox_type == "gt":
                bbox_key = "bbox"
            else:
                bbox_key = f"bbox_{test_bbox_type}"
            assert not self.flatten, "Do not use flattened dicts for test!"
            # here get batched rois
            roi_infos = {}
            # yapf: disable
            roi_keys = ["scene_im_id", "file_name", "cam", "im_H", "im_W",
                        "roi_img", "inst_id", "roi_coord_2d", "roi_cls", "score", "roi_extent",
                         bbox_key, "bbox_mode", "bbox_center", "roi_wh",
                         "scale", "resize_ratio", "model_info",
                        ]
            for _key in roi_keys:
                roi_infos[_key] = []
            # yapf: enable
            # TODO: how to handle image without detections
            #   filter those when load annotations or detections, implement a function for this
            # "annotations" means detections
            for inst_i, inst_infos in enumerate(dataset_dict["annotations"]):
                # inherent image-level infos
                roi_infos["scene_im_id"].append(dataset_dict["scene_im_id"])
                roi_infos["file_name"].append(dataset_dict["file_name"])
                roi_infos["im_H"].append(im_H)
                roi_infos["im_W"].append(im_W)
                roi_infos["cam"].append(dataset_dict["cam"].cpu().numpy())

                # roi-level infos
                roi_infos["inst_id"].append(inst_i)
                roi_infos["model_info"].append(inst_infos["model_info"])

                roi_cls = inst_infos["category_id"]
                roi_infos["roi_cls"].append(roi_cls)
                roi_infos["score"].append(inst_infos["score"])

                # extent
                roi_extent = self._get_extents(dataset_name)[roi_cls]
                roi_infos["roi_extent"].append(roi_extent)

                bbox = BoxMode.convert(inst_infos[bbox_key],
                                       inst_infos["bbox_mode"],
                                       BoxMode.XYXY_ABS)
                bbox = np.array(transforms.apply_box([bbox])[0])
                roi_infos[bbox_key].append(bbox)
                roi_infos["bbox_mode"].append(BoxMode.XYXY_ABS)
                x1, y1, x2, y2 = bbox
                bbox_center = np.array([0.5 * (x1 + x2), 0.5 * (y1 + y2)])
                bw = max(x2 - x1, 1)
                bh = max(y2 - y1, 1)
                scale = max(bh, bw) * cfg.INPUT.DZI_PAD_SCALE
                scale = min(scale, max(im_H, im_W)) * 1.0

                roi_infos["bbox_center"].append(bbox_center.astype("float32"))
                roi_infos["scale"].append(scale)
                roi_infos["roi_wh"].append(np.array([bw, bh],
                                                    dtype=np.float32))
                roi_infos["resize_ratio"].append(out_res / scale)

                # CHW, float32 tensor
                # roi_image
                roi_img = crop_resize_by_warp_affine(
                    image,
                    bbox_center,
                    scale,
                    input_res,
                    interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)

                roi_img = self.normalize_image(cfg, roi_img)
                roi_infos["roi_img"].append(roi_img.astype("float32"))

                # roi_coord_2d
                roi_coord_2d = crop_resize_by_warp_affine(
                    coord_2d,
                    bbox_center,
                    scale,
                    out_res,
                    interpolation=cv2.INTER_LINEAR).transpose(2, 0,
                                                              1)  # HWC -> CHW
                roi_infos["roi_coord_2d"].append(
                    roi_coord_2d.astype("float32"))

            for _key in roi_keys:
                if _key in ["roi_img", "roi_coord_2d"]:
                    dataset_dict[_key] = torch.as_tensor(
                        roi_infos[_key]).contiguous()
                elif _key in ["model_info", "scene_im_id", "file_name"]:
                    # can not convert to tensor
                    dataset_dict[_key] = roi_infos[_key]
                else:
                    dataset_dict[_key] = torch.tensor(roi_infos[_key])

            return dataset_dict
        #######################################################################################
        # NOTE: currently assume flattened dicts for train
        assert self.flatten, "Only support flattened dicts for train now"
        inst_infos = dataset_dict.pop("inst_infos")
        dataset_dict["roi_cls"] = roi_cls = inst_infos["category_id"]

        # extent
        roi_extent = self._get_extents(dataset_name)[roi_cls]
        dataset_dict["roi_extent"] = torch.tensor(roi_extent,
                                                  dtype=torch.float32)

        # load xyz =======================================================
        xyz_info = mmcv.load(inst_infos["xyz_path"])
        x1, y1, x2, y2 = xyz_info["xyxy"]
        # float16 does not affect performance (classification/regresion)
        xyz_crop = xyz_info["xyz_crop"]
        xyz = np.zeros((im_H, im_W, 3), dtype=np.float32)
        xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop
        # NOTE: full mask
        mask_obj = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) |
                    (xyz[:, :, 2] != 0)).astype(np.bool).astype(np.float32)
        if cfg.INPUT.SMOOTH_XYZ:
            xyz = self.smooth_xyz(xyz)

        if cfg.TRAIN.VIS:
            xyz = self.smooth_xyz(xyz)

        # override bbox info using xyz_infos
        inst_infos["bbox"] = [x1, y1, x2, y2]
        inst_infos["bbox_mode"] = BoxMode.XYXY_ABS

        # USER: Implement additional transformations if you have other types of data
        # inst_infos.pop("segmentation")  # NOTE: use mask from xyz
        anno = transform_instance_annotations(inst_infos,
                                              transforms,
                                              image_shape,
                                              keypoint_hflip_indices=None)

        # augment bbox ===================================================
        bbox_xyxy = anno["bbox"]
        bbox_center, scale = self.aug_bbox(cfg, bbox_xyxy, im_H, im_W)
        bw = max(bbox_xyxy[2] - bbox_xyxy[0], 1)
        bh = max(bbox_xyxy[3] - bbox_xyxy[1], 1)

        # CHW, float32 tensor
        ## roi_image ------------------------------------
        roi_img = crop_resize_by_warp_affine(
            image,
            bbox_center,
            scale,
            input_res,
            interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)

        roi_img = self.normalize_image(cfg, roi_img)

        # roi_coord_2d ----------------------------------------------------
        roi_coord_2d = crop_resize_by_warp_affine(
            coord_2d,
            bbox_center,
            scale,
            out_res,
            interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)

        ## roi_mask ---------------------------------------
        # (mask_trunc < mask_visib < mask_obj)
        mask_visib = anno["segmentation"].astype("float32") * mask_obj
        if mask_trunc is None:
            mask_trunc = mask_visib
        else:
            mask_trunc = mask_visib * mask_trunc.astype("float32")

        if cfg.TRAIN.VIS:
            mask_xyz_interp = cv2.INTER_LINEAR
        else:
            mask_xyz_interp = cv2.INTER_NEAREST

        # maybe truncated mask (true mask for rgb)
        roi_mask_trunc = crop_resize_by_warp_affine(
            mask_trunc[:, :, None],
            bbox_center,
            scale,
            out_res,
            interpolation=mask_xyz_interp)

        # use original visible mask to calculate xyz loss (try full obj mask?)
        roi_mask_visib = crop_resize_by_warp_affine(
            mask_visib[:, :, None],
            bbox_center,
            scale,
            out_res,
            interpolation=mask_xyz_interp)

        roi_mask_obj = crop_resize_by_warp_affine(
            mask_obj[:, :, None],
            bbox_center,
            scale,
            out_res,
            interpolation=mask_xyz_interp)

        ## roi_xyz ----------------------------------------------------
        roi_xyz = crop_resize_by_warp_affine(xyz,
                                             bbox_center,
                                             scale,
                                             out_res,
                                             interpolation=mask_xyz_interp)

        # region label
        if r_head_cfg.NUM_REGIONS > 1:
            fps_points = self._get_fps_points(dataset_name)[roi_cls]
            roi_region = xyz_to_region(roi_xyz, fps_points)  # HW
            dataset_dict["roi_region"] = torch.as_tensor(
                roi_region.astype(np.int32)).contiguous()

        roi_xyz = roi_xyz.transpose(2, 0, 1)  # HWC-->CHW
        # normalize xyz to [0, 1] using extent
        roi_xyz[0] = roi_xyz[0] / roi_extent[0] + 0.5
        roi_xyz[1] = roi_xyz[1] / roi_extent[1] + 0.5
        roi_xyz[2] = roi_xyz[2] / roi_extent[2] + 0.5

        if ("CE" in r_head_cfg.XYZ_LOSS_TYPE) or (
                "cls" in cfg.MODEL.CDPN.NAME):  # convert target to int for cls
            # assume roi_xyz has been normalized in [0, 1]
            roi_xyz_bin = np.zeros_like(roi_xyz)
            roi_x_norm = roi_xyz[0]
            roi_x_norm[roi_x_norm < 0] = 0  # clip
            roi_x_norm[roi_x_norm > 0.999999] = 0.999999
            # [0, BIN-1]
            roi_xyz_bin[0] = np.asarray(roi_x_norm * r_head_cfg.XYZ_BIN,
                                        dtype=np.uint8)

            roi_y_norm = roi_xyz[1]
            roi_y_norm[roi_y_norm < 0] = 0
            roi_y_norm[roi_y_norm > 0.999999] = 0.999999
            roi_xyz_bin[1] = np.asarray(roi_y_norm * r_head_cfg.XYZ_BIN,
                                        dtype=np.uint8)

            roi_z_norm = roi_xyz[2]
            roi_z_norm[roi_z_norm < 0] = 0
            roi_z_norm[roi_z_norm > 0.999999] = 0.999999
            roi_xyz_bin[2] = np.asarray(roi_z_norm * r_head_cfg.XYZ_BIN,
                                        dtype=np.uint8)

            # the last bin is for bg
            roi_masks = {
                "trunc": roi_mask_trunc,
                "visib": roi_mask_visib,
                "obj": roi_mask_obj
            }
            roi_mask_xyz = roi_masks[r_head_cfg.XYZ_LOSS_MASK_GT]
            roi_xyz_bin[0][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN
            roi_xyz_bin[1][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN
            roi_xyz_bin[2][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN

            if "CE" in r_head_cfg.XYZ_LOSS_TYPE:
                dataset_dict["roi_xyz_bin"] = torch.as_tensor(
                    roi_xyz_bin.astype("uint8")).contiguous()
            if "/" in r_head_cfg.XYZ_LOSS_TYPE and len(
                    r_head_cfg.XYZ_LOSS_TYPE.split("/")[1]) > 0:
                dataset_dict["roi_xyz"] = torch.as_tensor(
                    roi_xyz.astype("float32")).contiguous()
        else:
            dataset_dict["roi_xyz"] = torch.as_tensor(
                roi_xyz.astype("float32")).contiguous()

        # pose targets ----------------------------------------------------------------------
        pose = inst_infos["pose"]
        allo_pose = egocentric_to_allocentric(pose)
        quat = inst_infos["quat"]
        allo_quat = mat2quat(allo_pose[:3, :3])

        # ====== actually not needed ==========
        if pnp_net_cfg.ROT_TYPE == "allo_quat":
            dataset_dict["allo_quat"] = torch.as_tensor(
                allo_quat.astype("float32"))
        elif pnp_net_cfg.ROT_TYPE == "ego_quat":
            dataset_dict["ego_quat"] = torch.as_tensor(quat.astype("float32"))
        # rot6d
        elif pnp_net_cfg.ROT_TYPE == "ego_rot6d":
            dataset_dict["ego_rot6d"] = torch.as_tensor(
                mat_to_ortho6d_np(pose[:3, :3].astype("float32")))
        elif pnp_net_cfg.ROT_TYPE == "allo_rot6d":
            dataset_dict["allo_rot6d"] = torch.as_tensor(
                mat_to_ortho6d_np(allo_pose[:3, :3].astype("float32")))
        # log quat
        elif pnp_net_cfg.ROT_TYPE == "ego_log_quat":
            dataset_dict["ego_log_quat"] = quaternion_lf.qlog(
                torch.as_tensor(quat.astype("float32"))[None])[0]
        elif pnp_net_cfg.ROT_TYPE == "allo_log_quat":
            dataset_dict["allo_log_quat"] = quaternion_lf.qlog(
                torch.as_tensor(allo_quat.astype("float32"))[None])[0]
        # lie vec
        elif pnp_net_cfg.ROT_TYPE == "ego_lie_vec":
            dataset_dict["ego_lie_vec"] = lie_algebra.rot_to_lie_vec(
                torch.as_tensor(pose[:3, :3].astype("float32")[None]))[0]
        elif pnp_net_cfg.ROT_TYPE == "allo_lie_vec":
            dataset_dict["allo_lie_vec"] = lie_algebra.rot_to_lie_vec(
                torch.as_tensor(allo_pose[:3, :3].astype("float32"))[None])[0]
        else:
            raise ValueError(f"Unknown rot type: {pnp_net_cfg.ROT_TYPE}")
        dataset_dict["ego_rot"] = torch.as_tensor(
            pose[:3, :3].astype("float32"))
        dataset_dict["trans"] = torch.as_tensor(
            inst_infos["trans"].astype("float32"))

        dataset_dict["roi_points"] = torch.as_tensor(
            self._get_model_points(dataset_name)[roi_cls].astype("float32"))
        dataset_dict["sym_info"] = self._get_sym_infos(dataset_name)[roi_cls]

        dataset_dict["roi_img"] = torch.as_tensor(
            roi_img.astype("float32")).contiguous()
        dataset_dict["roi_coord_2d"] = torch.as_tensor(
            roi_coord_2d.astype("float32")).contiguous()

        dataset_dict["roi_mask_trunc"] = torch.as_tensor(
            roi_mask_trunc.astype("float32")).contiguous()
        dataset_dict["roi_mask_visib"] = torch.as_tensor(
            roi_mask_visib.astype("float32")).contiguous()
        dataset_dict["roi_mask_obj"] = torch.as_tensor(
            roi_mask_obj.astype("float32")).contiguous()

        dataset_dict["bbox_center"] = torch.as_tensor(bbox_center,
                                                      dtype=torch.float32)
        dataset_dict["scale"] = scale
        dataset_dict["bbox"] = anno["bbox"]  # NOTE: original bbox
        dataset_dict["roi_wh"] = torch.as_tensor(
            np.array([bw, bh], dtype=np.float32))
        dataset_dict["resize_ratio"] = resize_ratio = out_res / scale
        z_ratio = inst_infos["trans"][2] / resize_ratio
        obj_center = anno["centroid_2d"]
        delta_c = obj_center - bbox_center
        dataset_dict["trans_ratio"] = torch.as_tensor(
            [delta_c[0] / bw, delta_c[1] / bh, z_ratio]).to(torch.float32)
        return dataset_dict
예제 #3
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(
        len(dicts),
        time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        imH, imW = img.shape[:2]
        annos = d["annotations"]
        masks = [
            cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos
        ]
        bboxes = [anno["bbox"] for anno in annos]
        bbox_modes = [anno["bbox_mode"] for anno in annos]
        bboxes_xyxy = np.array([
            BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS)
            for box, box_mode in zip(bboxes, bbox_modes)
        ])
        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
        quats = [anno["quat"] for anno in annos]
        transes = [anno["trans"] for anno in annos]
        Rs = [quat2mat(quat) for quat in quats]
        # 0-based label
        cat_ids = [anno["category_id"] for anno in annos]
        K = d["cam"]
        kpts_2d = [
            misc.project_pts(kpt3d, K, R, t)
            for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)
        ]
        # # TODO: visualize pose and keypoints
        labels = [objs[cat_id] for cat_id in cat_ids]
        for _i in range(len(annos)):
            img_vis = vis_image_mask_bbox_cv2(img,
                                              masks[_i:_i + 1],
                                              bboxes=bboxes_xyxy[_i:_i + 1],
                                              labels=labels[_i:_i + 1])
            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(),
                                                       kpts_2d[_i])
            if "test" not in dset_name:
                xyz_path = annos[_i]["xyz_path"]
                xyz_info = mmcv.load(xyz_path)
                x1, y1, x2, y2 = xyz_info["xyxy"]
                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
                xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop
                xyz_show = get_emb_show(xyz)
                xyz_crop_show = get_emb_show(xyz_crop)
                img_xyz = img.copy() / 255.0
                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) |
                            (xyz[:, :, 2] != 0)).astype("uint8")
                fg_idx = np.where(mask_xyz != 0)
                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0],
                                                            fg_idx[1], :3]
                img_xyz_crop = img_xyz[y1:y2 + 1, x1:x2 + 1, :]
                img_vis_crop = img_vis[y1:y2 + 1, x1:x2 + 1, :]
                # diff mask
                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1:y2 + 1,
                                                             x1:x2 + 1]

                grid_show(
                    [
                        img[:, :, [2, 1, 0]],
                        img_vis[:, :, [2, 1, 0]],
                        img_vis_kpts2d[:, :, [2, 1, 0]],
                        depth,
                        # xyz_show,
                        diff_mask_xyz,
                        xyz_crop_show,
                        img_xyz[:, :, [2, 1, 0]],
                        img_xyz_crop[:, :, [2, 1, 0]],
                        img_vis_crop,
                    ],
                    [
                        "img",
                        "vis_img",
                        "img_vis_kpts2d",
                        "depth",
                        "diff_mask_xyz",
                        "xyz_crop_show",
                        "img_xyz",
                        "img_xyz_crop",
                        "img_vis_crop",
                    ],
                    row=3,
                    col=3,
                )
            else:
                grid_show(
                    [
                        img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]],
                        img_vis_kpts2d[:, :, [2, 1, 0]], depth
                    ],
                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
                    row=2,
                    col=2,
                )
예제 #4
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(
        len(dicts),
        time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        imH, imW = img.shape[:2]
        annos = d["annotations"]
        masks = [
            cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos
        ]
        bboxes = [anno["bbox"] for anno in annos]
        bbox_modes = [anno["bbox_mode"] for anno in annos]
        bboxes_xyxy = np.array([
            BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS)
            for box, box_mode in zip(bboxes, bbox_modes)
        ])
        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
        quats = [anno["quat"] for anno in annos]
        transes = [anno["trans"] for anno in annos]
        Rs = [quat2mat(quat) for quat in quats]
        # 0-based label
        cat_ids = [anno["category_id"] for anno in annos]
        K = d["cam"]
        kpts_2d = [
            misc.project_pts(kpt3d, K, R, t)
            for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)
        ]
        # # TODO: visualize pose and keypoints
        labels = [objs[cat_id] for cat_id in cat_ids]
        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
        img_vis = vis_image_mask_bbox_cv2(img,
                                          masks,
                                          bboxes=bboxes_xyxy,
                                          labels=labels)
        img_vis_kpts2d = img.copy()
        for anno_i in range(len(annos)):
            img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d,
                                                       kpts_2d[anno_i])
        grid_show(
            [
                img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]],
                img_vis_kpts2d[:, :, [2, 1, 0]], depth
            ],
            [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"],
            row=2,
            col=2,
        )