Example #1
0
    def __getitem__(self, index):
        dataset = self._dataset

        frame = dataset.get_frame(index)
        examples = dataset.get_example(index)

        vizs = []
        for example in examples:
            viz = imgviz.tile(
                [
                    example["rgb"],
                    imgviz.depth2rgb(example["pcd"][:, :, 0]),
                    imgviz.depth2rgb(example["pcd"][:, :, 1]),
                    imgviz.depth2rgb(example["pcd"][:, :, 2]),
                ],
                border=(255, 255, 255),
            )
            vizs.append(viz)
        viz = imgviz.tile(vizs)
        del vizs

        viz = imgviz.tile([frame["rgb"], viz], shape=(1, 2))
        viz = imgviz.resize(viz, width=1000)

        return viz
Example #2
0
    def debug_render(self, T_camera2world):
        class_names = self._models.class_names

        height, width = 480, 640
        fovx = 60
        fovy = fovx / width * height

        scene = morefusion.extra.pybullet.get_trimesh_scene()
        list(scene.geometry.values())[0].visual.face_colors = (1.0, 1.0, 1.0)
        for name, geometry in scene.geometry.items():
            if hasattr(geometry.visual, "to_color"):
                geometry.visual = geometry.visual.to_color()
        scene.camera.resolution = (width, height)
        scene.camera.fov = (fovx, fovy)
        scene.camera_transform = morefusion.extra.trimesh.to_opengl_transform(
            T_camera2world)

        rgb, depth, ins, cls = self.render(
            T_camera2world,
            fovy=scene.camera.fov[1],
            height=height,
            width=width,
        )

        ins_viz = imgviz.label2rgb(ins + 1, rgb)
        cls_viz = imgviz.label2rgb(cls,
                                   rgb,
                                   label_names=class_names,
                                   font_size=20)
        viz = imgviz.tile([rgb, ins_viz, cls_viz],
                          border=(255, 255, 255),
                          shape=(1, 3))
        viz = imgviz.resize(viz, width=1500)
        imgviz.io.pyglet_imshow(viz, "pybullet")

        rgb = morefusion.extra.trimesh.save_image(scene)[:, :, :3]
        ins_viz = imgviz.label2rgb(ins + 1, rgb)
        cls_viz = imgviz.label2rgb(cls,
                                   rgb,
                                   label_names=class_names,
                                   font_size=20)
        viz = imgviz.tile([rgb, ins_viz, cls_viz],
                          border=(255, 255, 255),
                          shape=(1, 3))
        viz = imgviz.resize(viz, width=1500)
        imgviz.io.pyglet_imshow(viz, "trimesh")

        imgviz.io.pyglet_run()
Example #3
0
def tile():
    data = imgviz.data.arc2017()

    rgb = data["rgb"]
    bboxes = data["bboxes"].astype(int)
    masks = data["masks"] == 1
    crops = []
    for bbox, mask in zip(bboxes, masks):
        slice_ = slice(bbox[0], bbox[2]), slice(bbox[1], bbox[3])
        rgb_crop = rgb[slice_]
        mask_crop = mask[slice_]
        crops.append(rgb_crop * mask_crop[:, :, None])
    tiled = imgviz.tile(imgs=crops, border=(255, 255, 255))

    # -------------------------------------------------------------------------

    plt.figure(dpi=200)

    plt.subplot(121)
    plt.title("original")
    plt.imshow(rgb)
    plt.axis("off")

    plt.subplot(122)
    plt.title("instances")
    plt.imshow(tiled)
    plt.axis("off")

    img = imgviz.io.pyplot_to_numpy()
    plt.close()

    return img
def main():
    example = morefusion.datasets.YCBVideoDataset("train")[0]
    depth = example["depth"]
    K = example["meta"]["intrinsic_matrix"]
    pcd = morefusion.geometry.pointcloud_from_depth(depth,
                                                    fx=K[0, 0],
                                                    fy=K[1, 1],
                                                    cx=K[0, 2],
                                                    cy=K[1, 2])

    normals_organized = morefusion.geometry.estimate_pointcloud_normals(pcd)

    nonnan = ~np.isnan(depth)
    normals_unorganized = np.full_like(pcd, -1)
    normals_unorganized[
        nonnan] = morefusion.geometry.estimate_pointcloud_normals(pcd[nonnan])

    normals_organized = np.uint8((normals_organized + 1) / 2 * 255)
    normals_unorganized = np.uint8((normals_unorganized + 1) / 2 * 255)

    viz = imgviz.tile(
        [normals_organized, normals_unorganized],
        (1, 2),
        border=(255, 255, 255),
    )
    imgviz.io.pyglet_imshow(viz)
    imgviz.io.pyglet_run()
Example #5
0
def main():
    df, top_images = get_data()

    fig = plt.figure(figsize=(15, 11))
    axes = fig.subplots(3, 1)

    df.plot.bar(
        x="name",
        y="bbox_diagonal",
        color=(0.1, 0.1, 0.1, 0.1),
        edgecolor="red",
        ax=axes[0],
    )
    axes[0].get_xaxis().set_visible(False)

    df.plot.bar(
        x="name",
        y="voxel_size",
        color=(0.1, 0.1, 0.1, 0.1),
        edgecolor="blue",
        rot=45,
        ax=axes[1],
    )
    axes[1].set_xlabel(None)

    axes[2].imshow(imgviz.tile(top_images, shape=(1, len(top_images))))
    axes[2].get_xaxis().set_visible(False)
    axes[2].get_yaxis().set_visible(False)

    plt.suptitle("BBox and Voxel size of YCB_Video_Models")

    plt.show()
Example #6
0
def test_tile():
    img1 = np.random.uniform(0, 255, (15, 25, 3)).round().astype(np.uint8)
    img2 = np.random.uniform(0, 255, (25, 25, 3)).round().astype(np.uint8)
    img3 = np.random.uniform(0, 255, (25, 10, 3)).round().astype(np.uint8)
    tiled = imgviz.tile([img1, img2, img3], (1, 3))

    assert tiled.shape == (25, 75, 3)
    assert tiled.dtype == np.uint8
Example #7
0
    def __getitem__(self, index):
        example = self._dataset[index]
        instance_id = self._dataset._ids[index]

        image_id = osp.dirname(instance_id)
        index_parent = self._dataset_parent._ids.index(image_id)
        frame = self._dataset_parent.get_frame(index_parent)

        viz = imgviz.tile(
            [
                example["rgb"],
                imgviz.depth2rgb(example["pcd"][:, :, 0]),
                imgviz.depth2rgb(example["pcd"][:, :, 1]),
                imgviz.depth2rgb(example["pcd"][:, :, 2]),
            ],
            border=(255, 255, 255),
        )
        viz = imgviz.tile([frame["rgb"], viz], (1, 2))
        return viz
    def __getitem__(self, index):
        rgb, bbox, label, mask = self._dataset[index]
        rgb += self._model.extractor.mean
        rgb = rgb.astype(np.uint8)
        rgb = rgb.transpose(1, 2, 0)

        class_ids = label + 1
        captions = [
            morefusion.datasets.ycb_video.class_names[c] for c in class_ids
        ]
        viz = imgviz.instances2rgb(
            rgb, labels=class_ids, masks=mask, bboxes=bbox, captions=captions,
        )
        viz = imgviz.tile([rgb, viz], shape=(1, 2), border=(255, 255, 255))
        return imgviz.resize(viz, width=1500)
Example #9
0
def tile(in_files, out, resize=1, shape=None):
    fps = None
    max_n_frames = 0
    readers = []
    for in_file in sorted(in_files):
        reader = imageio.get_reader(in_file)
        readers.append(reader)

        if fps is None:
            fps = reader.get_meta_data()["fps"]
        max_n_frames = max(max_n_frames, reader.count_frames())

    i = 0
    writer = None
    images_blank = None
    pbar = tqdm.tqdm(desc=out, total=max_n_frames)
    while True:
        images = []
        finished = []
        for j, reader in enumerate(readers):
            finished = []
            try:
                img = reader.get_data(i)
                if resize != 1:
                    height = int(round(img.shape[0] * resize))
                    img = imgviz.resize(img, height=height)
                finished.append(False)
            except IndexError:
                img = images_blank[j]
                finished.append(True)
            images.append(img)
        if all(finished):
            break
        if images_blank is None:
            images_blank = [np.zeros_like(img) for img in images]
        img = imgviz.tile(images, shape=shape, border=(255, 255, 255))
        img = utils.resize_to_even(img)
        i += 1
        if writer is None:
            writer = imageio.get_writer(
                out,
                fps=fps,
                macro_block_size=utils.get_macro_block_size(img.shape[:2]),
                ffmpeg_log_level="error",
            )
        writer.append_data(img)
        pbar.update()
    pbar.close()
Example #10
0
    def __getitem__(self, i):
        image_id = self.dataset._ids[i]
        example = self.dataset[i]

        rgb = example["rgb"]
        masks = example["masks"]
        labels = example["labels"]

        captions = morefusion.datasets.ycb_video.class_names[labels]
        viz = imgviz.instances2rgb(rgb, labels, masks=masks, captions=captions)
        viz = imgviz.draw.text_in_rectangle(viz,
                                            loc="lt",
                                            text=image_id,
                                            size=30,
                                            background=(0, 255, 0))
        return imgviz.tile([rgb, viz], shape=(1, 2), border=(255, 255, 255))
    def __getitem__(self, i):
        example = self._dataset[i]

        rgb = example["color"]
        depth_viz = imgviz.depth2rgb(example["depth"])
        label_viz = imgviz.label2rgb(
            example["result"]["labels"],
            label_names=morefusion.datasets.ycb_video.class_names,
        )

        viz = imgviz.tile(
            [rgb, depth_viz, label_viz],
            shape=(1, 3),
            border=(255, 255, 255),
        )
        viz = imgviz.resize(viz, width=1000)
        return viz
Example #12
0
def main():
    models = morefusion.datasets.YCBVideoModels()

    with concurrent.futures.ProcessPoolExecutor() as executor:
        futures = []
        for class_id in range(models.n_class):
            if class_id == 0:
                continue
            future = executor.submit(_get_top_image, class_id)
            futures.append(future)

    viz = []
    for future in futures:
        viz_i = future.result()
        viz.append(viz_i)
    viz = imgviz.tile(viz, shape=(4, 6))
    imgviz.io.pyglet_imshow(viz)
    imgviz.io.pyglet_run()
Example #13
0
    def __getitem__(self, i):
        image_id = self.dataset._ids[i]

        rgb = self.dataset[i]["rgb"]

        with morefusion.utils.timer():
            masks, labels, confs = self.model.predict(
                [rgb.astype(np.float32).transpose(2, 0, 1)])
        masks = masks[0]
        labels = labels[0]
        confs = confs[0]

        keep = masks.sum(axis=(1, 2)) > 0
        masks = masks[keep]
        labels = labels[keep]
        confs = confs[keep]

        class_ids = labels + 1

        captions = [
            f"{self.class_names[cid]}: {conf:.1%}"
            for cid, conf in zip(class_ids, confs)
        ]
        for caption in captions:
            print(caption)
        viz = imgviz.instances.instances2rgb(
            image=rgb,
            masks=masks,
            labels=class_ids,
            captions=captions,
            font_size=15,
        )
        viz = imgviz.tile([rgb, viz], (1, 2), border=(0, 0, 0))
        viz = imgviz.draw.text_in_rectangle(viz,
                                            loc="lt",
                                            text=image_id,
                                            size=25,
                                            background=(0, 255, 0))
        return viz
Example #14
0
    def images(generator, Ts_cam2world):
        depth2rgb = imgviz.Depth2RGB()
        n_points = len(Ts_cam2world)
        for i, T_cam2world in enumerate(Ts_cam2world):
            # generator.debug_render(T_cam2world)

            rgb, depth, ins, cls = generator.render(
                T_cam2world,
                fovy=45,
                height=480,
                width=640,
            )
            viz = imgviz.tile(
                [
                    rgb,
                    depth2rgb(depth),
                    imgviz.label2rgb(ins + 1, rgb),
                    imgviz.label2rgb(cls, rgb),
                ],
                border=(255, 255, 255),
            )
            viz = imgviz.resize(viz, width=1000)

            font_size = 25
            text = f"{i + 1:04d} / {n_points:04d}"
            size = imgviz.draw.text_size(text, font_size)
            viz = imgviz.draw.rectangle(viz, (1, 1),
                                        size,
                                        outline=(0, 255, 0),
                                        fill=(0, 255, 0))
            viz = imgviz.draw.text(viz, (1, 1),
                                   text,
                                   color=(0, 0, 0),
                                   size=font_size)

            imgviz.io.cv_imshow(viz)
            imgviz.io.cv_waitkey(10)
Example #15
0
labelviz = imgviz.label2rgb(class_label, label_names=data["class_names"])

# instance bboxes
rgb = data["rgb"]
bboxes = data["bboxes"].astype(int)
labels = data["labels"]
captions = [data["class_names"][l] for l in labels]
bboxviz = imgviz.instances2rgb(image=rgb, bboxes=bboxes, labels=labels, captions=captions)

# instance masks
masks = data["masks"] == 1
maskviz = imgviz.instances2rgb(image=rgb, masks=masks, labels=labels, captions=captions)

# tile instance masks
insviz = [(rgb * m[:, :, None])[b[0] : b[2], b[1] : b[3]] for b, m in zip(bboxes, masks)]
insviz = imgviz.tile(imgs=insviz, border=(255, 255, 255))

# tile visualization
tiled = imgviz.tile(
    [rgb, depthviz, labelviz, bboxviz, maskviz, insviz],
    shape=(1, 6),
    border=(255, 255, 255),
    border_width=5,
)
# }} GETTING_STARTED
# -----------------------------------------------------------------------------

out_file = osp.join(here, ".readme/getting_started.jpg")
imgviz.io.imsave(out_file, tiled)

img = imgviz.io.imread(out_file)
morefusion.extra.pybullet.init_world()

pybullet.resetDebugVisualizerCamera(
    cameraDistance=0.5,
    cameraYaw=45,
    cameraPitch=-45,
    cameraTargetPosition=(0, 0, 0),
)

morefusion.extra.pybullet.add_model(
    visual_file=cad_file,
    position=(0, 0, 0.3),
)
for _ in range(1000):
    pybullet.stepSimulation()

rgb, depth, segm = morefusion.extra.pybullet.get_debug_visualizer_image()

morefusion.extra.pybullet.del_world()

viz = imgviz.tile(
    [rgb, imgviz.depth2rgb(depth),
     imgviz.label2rgb(segm)],
    shape=(1, 3),
    border=(255, 255, 255),
)
viz = imgviz.resize(viz, width=1500)
imgviz.io.pyglet_imshow(viz)
imgviz.io.pyglet_run()
Example #17
0
            paths.remove(OBJECT_MODEL_PATH + '/' + dataset + '/models/obj_000007.ply') # crayola crayon
            paths.remove(OBJECT_MODEL_PATH + '/' + dataset + '/models/obj_000021.ply') # crayola crayon

        # visualization
        imgs = []
        print("Loading", dataset)
        for path in tqdm(paths):

            m = vedo.load(path)
            if dataset in ["ycbv", "ruapc", "kit", "bigbird"]:
                m = m.texture(path[:-3] + "png")

            m.show(interactive=False, viewup='z')
            vedo.screenshot('tmp.png')

            img = create_thumbnail('tmp.png', 256)
            imgs.append(np.uint8(img))
        
        tiled = imgviz.tile(imgs=imgs, border=(255, 255, 255), cval=(255, 255, 255))
        plt.figure(dpi=700)
        plt.title(dataset)
        plt.imshow(tiled)
        plt.axis("off")
        img = imgviz.io.pyplot_to_numpy()
        plt.close()
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite(OUTPUT_PATH + '/' + dataset + '.png', img)


    
    def _callback(self,
                  cam_msg,
                  rgb_msg,
                  depth_msg,
                  ins_msg,
                  cls_msg,
                  noentry_msg=None):
        bridge = cv_bridge.CvBridge()
        rgb = bridge.imgmsg_to_cv2(rgb_msg, desired_encoding="rgb8")
        depth = bridge.imgmsg_to_cv2(depth_msg)
        if depth.dtype == np.uint16:
            depth = depth.astype(np.float32) / 1000
            depth[depth == 0] = np.nan
        assert depth.dtype == np.float32
        K = np.array(cam_msg.K).reshape(3, 3)
        pcd = morefusion.geometry.pointcloud_from_depth(
            depth, K[0, 0], K[1, 1], K[0, 2], K[1, 2])
        ins = bridge.imgmsg_to_cv2(ins_msg)

        grids_noentry = {}
        if noentry_msg:
            for grid in noentry_msg.grids:
                instance_id = grid.instance_id
                dims = (grid.dims.x, grid.dims.y, grid.dims.z)
                indices = np.array(grid.indices)
                k = indices % grid.dims.z
                j = indices // grid.dims.z % grid.dims.y
                i = indices // grid.dims.z // grid.dims.y
                grid_nontarget_empty = np.zeros(dims, dtype=bool)
                grid_nontarget_empty[i, j, k] = True
                origin = np.array(
                    [grid.origin.x, grid.origin.y, grid.origin.z],
                    dtype=np.float32,
                )
                grids_noentry[instance_id] = dict(
                    origin=origin,
                    pitch=grid.pitch,
                    matrix=grid_nontarget_empty,
                )

        instance_ids = []
        class_ids = []
        for cls in cls_msg.classes:
            instance_ids.append(cls.instance_id)
            class_ids.append(cls.class_id)
        instance_ids = np.array(instance_ids)
        class_ids = np.array(class_ids)

        examples = []
        keep = []
        nanmask = np.isnan(pcd).any(axis=2)
        for i, (ins_id, cls_id) in enumerate(zip(instance_ids, class_ids)):
            if self._with_occupancy and ins_id not in grids_noentry:
                # it is inactive in some reason (e.g., on the edge)
                continue

            mask = ins == ins_id
            if (~nanmask & mask).sum() < 50:
                continue
            bbox = morefusion.geometry.masks_to_bboxes([mask])[0]
            y1, x1, y2, x2 = bbox.round().astype(int)
            rgb_ins = rgb[y1:y2, x1:x2].copy()
            rgb_ins[~mask[y1:y2, x1:x2]] = 0
            rgb_ins = imgviz.centerize(rgb_ins, (256, 256), cval=0)
            pcd_ins = pcd[y1:y2, x1:x2].copy()
            pcd_ins[~mask[y1:y2, x1:x2]] = np.nan
            pcd_ins = imgviz.centerize(pcd_ins, (256, 256),
                                       cval=np.nan,
                                       interpolation="nearest")

            example = dict(
                class_id=cls_id,
                rgb=rgb_ins,
                pcd=pcd_ins,
            )
            if grids_noentry:
                example["origin"] = grids_noentry[ins_id]["origin"]
                example["pitch"] = grids_noentry[ins_id]["pitch"]
                example["grid_nontarget_empty"] = grids_noentry[ins_id][
                    "matrix"]
            examples.append(example)
            keep.append(i)
        if not examples:
            return
        inputs = chainer.dataset.concat_examples(examples, device=0)
        instance_ids = instance_ids[keep]
        del class_ids

        if self._pub_debug_rgbd.get_num_connections() > 0:
            debug_rgbd = [
                imgviz.tile(
                    [e["rgb"], imgviz.depth2rgb(e["pcd"][:, :, 2])], (1, 2))
                for e in examples
            ]
            debug_rgbd = imgviz.tile(debug_rgbd, border=(255, 255, 255))
            debug_rgbd_msg = bridge.cv2_to_imgmsg(debug_rgbd, encoding="rgb8")
            debug_rgbd_msg.header = rgb_msg.header
            self._pub_debug_rgbd.publish(debug_rgbd_msg)

        with chainer.no_backprop_mode(), chainer.using_config("train", False):
            quaternion, translation, confidence = self._model.predict(**inputs)
        indices = confidence.array.argmax(axis=1)
        B = quaternion.shape[0]
        confidence = confidence[np.arange(B), indices]
        quaternion = quaternion[np.arange(B), indices]
        translation = translation[np.arange(B), indices]
        confidence = chainer.cuda.to_cpu(confidence.array)
        quaternion = chainer.cuda.to_cpu(quaternion.array)
        translation = chainer.cuda.to_cpu(translation.array)
        """
        transforms = morefusion.functions.transformation_matrix(
            quaternion, translation
        ).array
        for i in range(B):
            pcd_cad = self._models.get_pcd(examples[i]['class_id'])
            pcd_depth = examples[i]['pcd']
            pcd_depth = pcd_depth[~np.isnan(pcd_depth).any(axis=2)]
            icp = morefusion.contrib.ICPRegistration(
                pcd_depth=pcd_depth,
                pcd_cad=pcd_cad,
                transform_init=transforms[i],
            )
            transform = icp.register()
            quaternion[i] = ttf.quaternion_from_matrix(transform)
            translation[i] = ttf.translation_from_matrix(transform)
        del transforms
        """

        poses = ObjectPoseArray()
        poses.header = rgb_msg.header
        for i, (ins_id, example) in enumerate(zip(instance_ids, examples)):
            """
            cls_id = example['class_id']
            class_name = morefusion.datasets.ycb_video.class_names[cls_id]
            morefusion.ros.loginfo_green(
                f'instance_id={ins_id}, class_id={cls_id}, '
                f'class_name={class_name}, confidence={confidence[i].item()}'
            )
            """

            if confidence[i].item() < 0.9:
                continue

            pose = ObjectPose()
            pose.pose.position.x = translation[i][0]
            pose.pose.position.y = translation[i][1]
            pose.pose.position.z = translation[i][2]
            pose.pose.orientation.w = quaternion[i][0]
            pose.pose.orientation.x = quaternion[i][1]
            pose.pose.orientation.y = quaternion[i][2]
            pose.pose.orientation.z = quaternion[i][3]
            pose.instance_id = ins_id
            pose.class_id = examples[i]["class_id"]
            poses.poses.append(pose)
        self._pub_poses.publish(poses)
Example #19
0
    def _process_frame(self, frame):
        meta = frame["meta"]
        color = frame["color"]

        depth = frame["depth"]
        depth_viz = imgviz.depth2rgb(depth, min_value=0, max_value=2)

        label = frame["label"]
        label_viz = imgviz.label2rgb(label)

        labels = meta["cls_indexes"].astype(np.int32)
        # NOTE: cls_mask is the same as ins_mask in YCB_Video_Dataset
        masks = np.asarray([label == cls_id for cls_id in labels])
        bboxes = morefusion.geometry.masks_to_bboxes(masks)

        keep = ~(bboxes == 0).all(axis=1)
        labels = labels[keep]
        bboxes = bboxes[keep]
        masks = masks[keep]

        gray = imgviz.gray2rgb(imgviz.rgb2gray(color))
        ins_viz = imgviz.instances2rgb(gray,
                                       labels=labels,
                                       bboxes=bboxes,
                                       masks=masks)

        vertmap = meta["vertmap"]
        vertmap[label == 0] = np.nan
        vert_viz_x = imgviz.depth2rgb(vertmap[:, :, 0])
        vert_viz_y = imgviz.depth2rgb(vertmap[:, :, 1])
        vert_viz_z = imgviz.depth2rgb(vertmap[:, :, 2])

        roi_viz_color = []
        roi_viz_depth = []
        roi_viz_label = []
        for bbox, mask in zip(bboxes, masks):
            y1, x1, y2, x2 = bbox.round().astype(int)
            mask_roi = mask[y1:y2, x1:x2]
            color_roi = color[y1:y2, x1:x2].copy()
            color_roi[~mask_roi] = 0
            depth_roi = depth_viz[y1:y2, x1:x2].copy()
            depth_roi[~mask_roi] = 0
            label_roi = label_viz[y1:y2, x1:x2].copy()
            label_roi[~mask_roi] = 0
            roi_viz_color.append(color_roi)
            roi_viz_depth.append(depth_roi)
            roi_viz_label.append(label_roi)
        roi_viz_color = imgviz.tile(roi_viz_color, border=(255, 255, 255))
        roi_viz_depth = imgviz.tile(roi_viz_depth, border=(255, 255, 255))
        roi_viz_label = imgviz.tile(roi_viz_label, border=(255, 255, 255))

        viz = imgviz.tile(
            [
                color,
                depth_viz,
                label_viz,
                ins_viz,
                vert_viz_x,
                vert_viz_y,
                vert_viz_z,
                np.zeros_like(color),
                roi_viz_color,
                roi_viz_depth,
                roi_viz_label,
                np.zeros_like(roi_viz_color),
            ],
            shape=(3, 4),
            border=(255, 255, 255),
        )
        viz = imgviz.centerize(viz, (1000, 1000))

        return viz
Example #20
0
def get_scene(dataset):
    camera = trimesh.scene.Camera(fov=(30, 22.5))
    index = 0
    frame = dataset.get_frame(index)
    examples = dataset.get_example(index)

    scenes = {
        "rgb": None,
    }

    camera_transform = morefusion.extra.trimesh.to_opengl_transform()

    vizs = [frame["rgb"]]
    for i, example in enumerate(examples):
        viz = imgviz.tile(
            [
                example["rgb"],
                imgviz.depth2rgb(example["pcd"][:, :, 0]),
                imgviz.depth2rgb(example["pcd"][:, :, 1]),
                imgviz.depth2rgb(example["pcd"][:, :, 2]),
            ],
            border=(255, 255, 255),
        )
        viz = imgviz.draw.text_in_rectangle(
            viz,
            "lt",
            f"visibility: {example['visibility']:.0%}",
            size=30,
            background=(0, 255, 0),
            color=(0, 0, 0),
        )
        vizs.append(viz)

        geom = trimesh.voxel.VoxelGrid(
            example["grid_target"],
            ttf.scale_and_translate(example["pitch"], example["origin"]),
        ).as_boxes(colors=(1.0, 0, 0, 0.5))
        scenes[f"occupied_{i:04d}"] = trimesh.Scene(
            geom, camera=camera, camera_transform=camera_transform)

        geom = trimesh.voxel.VoxelGrid(
            example["grid_nontarget"],
            ttf.scale_and_translate(example["pitch"], example["origin"]),
        ).as_boxes(colors=(0, 1.0, 0, 0.5))
        scenes[f"occupied_{i:04d}"].add_geometry(geom)

        geom = trimesh.voxel.VoxelGrid(
            example["grid_empty"],
            ttf.scale_and_translate(example["pitch"], example["origin"]),
        ).as_boxes(colors=(0.5, 0.5, 0.5, 0.5))
        scenes[f"empty_{i:04d}"] = trimesh.Scene(
            geom, camera=camera, camera_transform=camera_transform)

        scenes[f"full_occupied_{i:04d}"] = trimesh.Scene(
            camera=camera, camera_transform=camera_transform)
        if (example["grid_target_full"] > 0).any():
            geom = trimesh.voxel.VoxelGrid(
                example["grid_target_full"],
                ttf.scale_and_translate(example["pitch"], example["origin"]),
            ).as_boxes(colors=(1.0, 0, 0, 0.5))
            scenes[f"full_occupied_{i:04d}"].add_geometry(geom)

        if (example["grid_nontarget_full"] > 0).any():
            colors = imgviz.label2rgb(
                example["grid_nontarget_full"].reshape(1, -1) +
                1).reshape(example["grid_nontarget_full"].shape + (3, ))
            geom = trimesh.voxel.VoxelGrid(
                example["grid_nontarget_full"],
                ttf.scale_and_translate(example["pitch"], example["origin"]),
            ).as_boxes(colors=colors)
            scenes[f"full_occupied_{i:04d}"].add_geometry(geom)

        dim = example["grid_target"].shape[0]
        extents = np.array([dim, dim, dim]) * example["pitch"]
        geom = trimesh.path.creation.box_outline(extents)
        geom.apply_translation(example["origin"] +
                               (dim / 2 - 0.5) * example["pitch"])
        scenes[f"occupied_{i:04d}"].add_geometry(geom)
        scenes[f"empty_{i:04d}"].add_geometry(geom)
    viz = imgviz.tile(vizs)

    scenes["rgb"] = viz

    return scenes
Example #21
0
        def __getitem__(self, index):
            index = self.indices[index]
            result_file = args.result / f"{index:04d}.mat"  # NOQA
            print(result_file)
            result = scipy.io.loadmat(result_file,
                                      chars_as_strings=True,
                                      squeeze_me=True)
            frame_id = "/".join(result["frame_id"].split("/")[1:])

            frame = morefusion.datasets.YCBVideoDataset.get_frame(frame_id)

            rgb = frame["color"]
            depth = frame["depth"]
            bboxes = result["bboxes"]
            K = frame["meta"]["intrinsic_matrix"]
            labels = result["labels"].astype(np.int32)
            masks = result["masks"].astype(bool)

            keep = np.isin(labels, frame["meta"]["cls_indexes"])
            bboxes = bboxes[keep]
            labels = labels[keep]
            masks = masks[keep]

            captions = [
                morefusion.datasets.ycb_video.class_names[l] for l in labels
            ]
            detections_viz = imgviz.instances2rgb(
                rgb,
                labels=labels,
                bboxes=bboxes,
                masks=masks,
                captions=captions,
                font_size=15,
            )

            camera = trimesh.scene.Camera(resolution=(640, 480),
                                          focal=(K[0, 0], K[1, 1]))

            pybullet.connect(pybullet.DIRECT)
            for class_id, pose in zip(labels, result["poses"]):
                cad_file = morefusion.datasets.YCBVideoModels().get_cad_file(
                    class_id=class_id)
                morefusion.extra.pybullet.add_model(
                    cad_file,
                    position=pose[4:],
                    orientation=pose[:4][[1, 2, 3, 0]],
                )
            (
                rgb_rend,
                depth_rend,
                segm_rend,
            ) = morefusion.extra.pybullet.render_camera(np.eye(4),
                                                        fovy=camera.fov[1],
                                                        height=480,
                                                        width=640)
            pybullet.disconnect()

            min_value = 0.3
            max_value = 2 * np.nanmedian(depth) - min_value
            depth = imgviz.depth2rgb(depth,
                                     min_value=min_value,
                                     max_value=max_value)
            depth_rend = imgviz.depth2rgb(depth_rend,
                                          min_value=min_value,
                                          max_value=max_value)

            viz = imgviz.tile(
                [rgb, depth, detections_viz, rgb_rend, depth_rend],
                (2, 3),
                border=(255, ) * 3,
            )
            viz = imgviz.resize(viz, width=1500)
            return viz
Example #22
0
here = osp.dirname(osp.abspath(__file__))


if __name__ == '__main__':
    data = imgviz.data.arc2017()

    rgb = data['rgb']
    bboxes = data['bboxes'].astype(int)
    masks = data['masks'] == 1
    crops = []
    for bbox, mask in zip(bboxes, masks):
        slice_ = slice(bbox[0], bbox[2]), slice(bbox[1], bbox[3])
        rgb_crop = rgb[slice_]
        mask_crop = mask[slice_]
        crops.append(rgb_crop * mask_crop[:, :, None])
    tiled = imgviz.tile(imgs=crops, border=(255, 255, 255))

    # -------------------------------------------------------------------------

    plt.figure(dpi=200)

    plt.subplot(121)
    plt.title('original')
    plt.imshow(rgb)
    plt.axis('off')

    plt.subplot(122)
    plt.title('instances')
    plt.imshow(tiled)
    plt.axis('off')
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
    parser.add_argument("model", help="model file in a log dir")
    parser.add_argument("--gpu", type=int, default=0, help="gpu id")
    parser.add_argument("--save", action="store_true", help="save")
    args = parser.parse_args()

    args_file = path.Path(args.model).parent / "args"
    with open(args_file) as f:
        args_data = json.load(f)
    pprint.pprint(args_data)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()

    model = singleview_3d.models.Model(
        n_fg_class=len(args_data["class_names"][1:]),
        pretrained_resnet18=args_data["pretrained_resnet18"],
        with_occupancy=args_data["with_occupancy"],
        loss=args_data["loss"],
        loss_scale=args_data["loss_scale"],
    )
    if args.gpu >= 0:
        model.to_gpu()

    print(f"==> Loading trained model: {args.model}")
    chainer.serializers.load_npz(args.model, model)
    print("==> Done model loading")

    split = "val"
    dataset = morefusion.datasets.YCBVideoRGBDPoseEstimationDataset(
        split=split)
    dataset_reindexed = morefusion.datasets.YCBVideoRGBDPoseEstimationDatasetReIndexed(  # NOQA
        split=split,
        class_ids=args_data["class_ids"],
    )
    transform = Transform(
        train=False,
        with_occupancy=args_data["with_occupancy"],
    )

    pprint.pprint(args.__dict__)

    # -------------------------------------------------------------------------

    depth2rgb = imgviz.Depth2RGB()
    for index in range(len(dataset)):
        frame = dataset.get_frame(index)

        image_id = dataset._ids[index]
        indices = dataset_reindexed.get_indices_from_image_id(image_id)
        examples = dataset_reindexed[indices]
        examples = [transform(example) for example in examples]

        if not examples:
            continue
        inputs = chainer.dataset.concat_examples(examples, device=args.gpu)

        with chainer.no_backprop_mode() and chainer.using_config(
                "train", False):
            quaternion_pred, translation_pred, confidence_pred = model.predict(
                class_id=inputs["class_id"],
                rgb=inputs["rgb"],
                pcd=inputs["pcd"],
                pitch=inputs.get("pitch"),
                origin=inputs.get("origin"),
                grid_nontarget_empty=inputs.get("grid_nontarget_empty"),
            )

            indices = model.xp.argmax(confidence_pred.array, axis=1)
            quaternion_pred = quaternion_pred[
                model.xp.arange(quaternion_pred.shape[0]), indices]
            translation_pred = translation_pred[
                model.xp.arange(translation_pred.shape[0]), indices]

            reporter = chainer.Reporter()
            reporter.add_observer("main", model)
            observation = {}
            with reporter.scope(observation):
                model.evaluate(
                    class_id=inputs["class_id"],
                    quaternion_true=inputs["quaternion_true"],
                    translation_true=inputs["translation_true"],
                    quaternion_pred=quaternion_pred,
                    translation_pred=translation_pred,
                )

        # TODO(wkentaro)
        observation_new = {}
        for k, v in observation.items():
            if re.match(r"main/add_or_add_s/[0-9]+/.+", k):
                k_new = "/".join(k.split("/")[:-1])
                observation_new[k_new] = v
        observation = observation_new

        print(f"[{index:08d}] {observation}")

        # ---------------------------------------------------------------------

        K = frame["intrinsic_matrix"]
        height, width = frame["rgb"].shape[:2]
        fovy = trimesh.scene.Camera(resolution=(width, height),
                                    focal=(K[0, 0], K[1, 1])).fov[1]

        batch_size = len(inputs["class_id"])
        class_ids = cuda.to_cpu(inputs["class_id"])
        quaternion_pred = cuda.to_cpu(quaternion_pred.array)
        translation_pred = cuda.to_cpu(translation_pred.array)
        quaternion_true = cuda.to_cpu(inputs["quaternion_true"])
        translation_true = cuda.to_cpu(inputs["translation_true"])

        Ts_pred = []
        Ts_true = []
        for i in range(batch_size):
            # T_cad2cam
            T_pred = tf.quaternion_matrix(quaternion_pred[i])
            T_pred[:3, 3] = translation_pred[i]
            T_true = tf.quaternion_matrix(quaternion_true[i])
            T_true[:3, 3] = translation_true[i]
            Ts_pred.append(T_pred)
            Ts_true.append(T_true)

        Ts = dict(true=Ts_true, pred=Ts_pred)

        vizs = []
        depth_viz = depth2rgb(frame["depth"])
        for which in ["true", "pred"]:
            pybullet.connect(pybullet.DIRECT)
            for i, T in enumerate(Ts[which]):
                cad_file = morefusion.datasets.YCBVideoModels().get_cad_file(
                    class_id=class_ids[i])
                morefusion.extra.pybullet.add_model(
                    cad_file,
                    position=tf.translation_from_matrix(T),
                    orientation=tf.quaternion_from_matrix(T)[[1, 2, 3, 0]],
                )
            (
                rgb_rend,
                depth_rend,
                segm_rend,
            ) = morefusion.extra.pybullet.render_camera(
                np.eye(4), fovy, height, width)
            pybullet.disconnect()

            segm_rend = imgviz.label2rgb(segm_rend + 1,
                                         img=frame["rgb"],
                                         alpha=0.7)
            depth_rend = depth2rgb(depth_rend)
            rgb_input = imgviz.tile(cuda.to_cpu(inputs["rgb"]),
                                    border=(255, 255, 255))
            viz = imgviz.tile(
                [
                    frame["rgb"],
                    depth_viz,
                    rgb_input,
                    segm_rend,
                    rgb_rend,
                    depth_rend,
                ],
                (1, 6),
                border=(255, 255, 255),
            )
            viz = imgviz.resize(viz, width=1800)

            if which == "pred":
                text = []
                for class_id in np.unique(class_ids):
                    add = observation[f"main/add_or_add_s/{class_id:04d}"]
                    text.append(f"[{which}] [{class_id:04d}]: "
                                f"add/add_s={add * 100:.1f}cm")
                text = "\n".join(text)
            else:
                text = f"[{which}]"
            viz = imgviz.draw.text_in_rectangle(
                viz,
                loc="lt",
                text=text,
                size=20,
                background=(0, 255, 0),
                color=(0, 0, 0),
            )
            if which == "true":
                viz = imgviz.draw.text_in_rectangle(
                    viz,
                    loc="rt",
                    text="singleview_3d",
                    size=20,
                    background=(255, 0, 0),
                    color=(0, 0, 0),
                )
            vizs.append(viz)
        viz = imgviz.tile(vizs, (2, 1), border=(255, 255, 255))

        if args.save:
            out_file = path.Path(args.model).parent / f"video/{index:08d}.jpg"
            out_file.parent.makedirs_p()
            imgviz.io.imsave(out_file, viz)

        yield viz