def __getitem__(self, index): dataset = self._dataset frame = dataset.get_frame(index) examples = dataset.get_example(index) vizs = [] for example in examples: viz = imgviz.tile( [ example["rgb"], imgviz.depth2rgb(example["pcd"][:, :, 0]), imgviz.depth2rgb(example["pcd"][:, :, 1]), imgviz.depth2rgb(example["pcd"][:, :, 2]), ], border=(255, 255, 255), ) vizs.append(viz) viz = imgviz.tile(vizs) del vizs viz = imgviz.tile([frame["rgb"], viz], shape=(1, 2)) viz = imgviz.resize(viz, width=1000) return viz
def debug_render(self, T_camera2world): class_names = self._models.class_names height, width = 480, 640 fovx = 60 fovy = fovx / width * height scene = morefusion.extra.pybullet.get_trimesh_scene() list(scene.geometry.values())[0].visual.face_colors = (1.0, 1.0, 1.0) for name, geometry in scene.geometry.items(): if hasattr(geometry.visual, "to_color"): geometry.visual = geometry.visual.to_color() scene.camera.resolution = (width, height) scene.camera.fov = (fovx, fovy) scene.camera_transform = morefusion.extra.trimesh.to_opengl_transform( T_camera2world) rgb, depth, ins, cls = self.render( T_camera2world, fovy=scene.camera.fov[1], height=height, width=width, ) ins_viz = imgviz.label2rgb(ins + 1, rgb) cls_viz = imgviz.label2rgb(cls, rgb, label_names=class_names, font_size=20) viz = imgviz.tile([rgb, ins_viz, cls_viz], border=(255, 255, 255), shape=(1, 3)) viz = imgviz.resize(viz, width=1500) imgviz.io.pyglet_imshow(viz, "pybullet") rgb = morefusion.extra.trimesh.save_image(scene)[:, :, :3] ins_viz = imgviz.label2rgb(ins + 1, rgb) cls_viz = imgviz.label2rgb(cls, rgb, label_names=class_names, font_size=20) viz = imgviz.tile([rgb, ins_viz, cls_viz], border=(255, 255, 255), shape=(1, 3)) viz = imgviz.resize(viz, width=1500) imgviz.io.pyglet_imshow(viz, "trimesh") imgviz.io.pyglet_run()
def tile(): data = imgviz.data.arc2017() rgb = data["rgb"] bboxes = data["bboxes"].astype(int) masks = data["masks"] == 1 crops = [] for bbox, mask in zip(bboxes, masks): slice_ = slice(bbox[0], bbox[2]), slice(bbox[1], bbox[3]) rgb_crop = rgb[slice_] mask_crop = mask[slice_] crops.append(rgb_crop * mask_crop[:, :, None]) tiled = imgviz.tile(imgs=crops, border=(255, 255, 255)) # ------------------------------------------------------------------------- plt.figure(dpi=200) plt.subplot(121) plt.title("original") plt.imshow(rgb) plt.axis("off") plt.subplot(122) plt.title("instances") plt.imshow(tiled) plt.axis("off") img = imgviz.io.pyplot_to_numpy() plt.close() return img
def main(): example = morefusion.datasets.YCBVideoDataset("train")[0] depth = example["depth"] K = example["meta"]["intrinsic_matrix"] pcd = morefusion.geometry.pointcloud_from_depth(depth, fx=K[0, 0], fy=K[1, 1], cx=K[0, 2], cy=K[1, 2]) normals_organized = morefusion.geometry.estimate_pointcloud_normals(pcd) nonnan = ~np.isnan(depth) normals_unorganized = np.full_like(pcd, -1) normals_unorganized[ nonnan] = morefusion.geometry.estimate_pointcloud_normals(pcd[nonnan]) normals_organized = np.uint8((normals_organized + 1) / 2 * 255) normals_unorganized = np.uint8((normals_unorganized + 1) / 2 * 255) viz = imgviz.tile( [normals_organized, normals_unorganized], (1, 2), border=(255, 255, 255), ) imgviz.io.pyglet_imshow(viz) imgviz.io.pyglet_run()
def main(): df, top_images = get_data() fig = plt.figure(figsize=(15, 11)) axes = fig.subplots(3, 1) df.plot.bar( x="name", y="bbox_diagonal", color=(0.1, 0.1, 0.1, 0.1), edgecolor="red", ax=axes[0], ) axes[0].get_xaxis().set_visible(False) df.plot.bar( x="name", y="voxel_size", color=(0.1, 0.1, 0.1, 0.1), edgecolor="blue", rot=45, ax=axes[1], ) axes[1].set_xlabel(None) axes[2].imshow(imgviz.tile(top_images, shape=(1, len(top_images)))) axes[2].get_xaxis().set_visible(False) axes[2].get_yaxis().set_visible(False) plt.suptitle("BBox and Voxel size of YCB_Video_Models") plt.show()
def test_tile(): img1 = np.random.uniform(0, 255, (15, 25, 3)).round().astype(np.uint8) img2 = np.random.uniform(0, 255, (25, 25, 3)).round().astype(np.uint8) img3 = np.random.uniform(0, 255, (25, 10, 3)).round().astype(np.uint8) tiled = imgviz.tile([img1, img2, img3], (1, 3)) assert tiled.shape == (25, 75, 3) assert tiled.dtype == np.uint8
def __getitem__(self, index): example = self._dataset[index] instance_id = self._dataset._ids[index] image_id = osp.dirname(instance_id) index_parent = self._dataset_parent._ids.index(image_id) frame = self._dataset_parent.get_frame(index_parent) viz = imgviz.tile( [ example["rgb"], imgviz.depth2rgb(example["pcd"][:, :, 0]), imgviz.depth2rgb(example["pcd"][:, :, 1]), imgviz.depth2rgb(example["pcd"][:, :, 2]), ], border=(255, 255, 255), ) viz = imgviz.tile([frame["rgb"], viz], (1, 2)) return viz
def __getitem__(self, index): rgb, bbox, label, mask = self._dataset[index] rgb += self._model.extractor.mean rgb = rgb.astype(np.uint8) rgb = rgb.transpose(1, 2, 0) class_ids = label + 1 captions = [ morefusion.datasets.ycb_video.class_names[c] for c in class_ids ] viz = imgviz.instances2rgb( rgb, labels=class_ids, masks=mask, bboxes=bbox, captions=captions, ) viz = imgviz.tile([rgb, viz], shape=(1, 2), border=(255, 255, 255)) return imgviz.resize(viz, width=1500)
def tile(in_files, out, resize=1, shape=None): fps = None max_n_frames = 0 readers = [] for in_file in sorted(in_files): reader = imageio.get_reader(in_file) readers.append(reader) if fps is None: fps = reader.get_meta_data()["fps"] max_n_frames = max(max_n_frames, reader.count_frames()) i = 0 writer = None images_blank = None pbar = tqdm.tqdm(desc=out, total=max_n_frames) while True: images = [] finished = [] for j, reader in enumerate(readers): finished = [] try: img = reader.get_data(i) if resize != 1: height = int(round(img.shape[0] * resize)) img = imgviz.resize(img, height=height) finished.append(False) except IndexError: img = images_blank[j] finished.append(True) images.append(img) if all(finished): break if images_blank is None: images_blank = [np.zeros_like(img) for img in images] img = imgviz.tile(images, shape=shape, border=(255, 255, 255)) img = utils.resize_to_even(img) i += 1 if writer is None: writer = imageio.get_writer( out, fps=fps, macro_block_size=utils.get_macro_block_size(img.shape[:2]), ffmpeg_log_level="error", ) writer.append_data(img) pbar.update() pbar.close()
def __getitem__(self, i): image_id = self.dataset._ids[i] example = self.dataset[i] rgb = example["rgb"] masks = example["masks"] labels = example["labels"] captions = morefusion.datasets.ycb_video.class_names[labels] viz = imgviz.instances2rgb(rgb, labels, masks=masks, captions=captions) viz = imgviz.draw.text_in_rectangle(viz, loc="lt", text=image_id, size=30, background=(0, 255, 0)) return imgviz.tile([rgb, viz], shape=(1, 2), border=(255, 255, 255))
def __getitem__(self, i): example = self._dataset[i] rgb = example["color"] depth_viz = imgviz.depth2rgb(example["depth"]) label_viz = imgviz.label2rgb( example["result"]["labels"], label_names=morefusion.datasets.ycb_video.class_names, ) viz = imgviz.tile( [rgb, depth_viz, label_viz], shape=(1, 3), border=(255, 255, 255), ) viz = imgviz.resize(viz, width=1000) return viz
def main(): models = morefusion.datasets.YCBVideoModels() with concurrent.futures.ProcessPoolExecutor() as executor: futures = [] for class_id in range(models.n_class): if class_id == 0: continue future = executor.submit(_get_top_image, class_id) futures.append(future) viz = [] for future in futures: viz_i = future.result() viz.append(viz_i) viz = imgviz.tile(viz, shape=(4, 6)) imgviz.io.pyglet_imshow(viz) imgviz.io.pyglet_run()
def __getitem__(self, i): image_id = self.dataset._ids[i] rgb = self.dataset[i]["rgb"] with morefusion.utils.timer(): masks, labels, confs = self.model.predict( [rgb.astype(np.float32).transpose(2, 0, 1)]) masks = masks[0] labels = labels[0] confs = confs[0] keep = masks.sum(axis=(1, 2)) > 0 masks = masks[keep] labels = labels[keep] confs = confs[keep] class_ids = labels + 1 captions = [ f"{self.class_names[cid]}: {conf:.1%}" for cid, conf in zip(class_ids, confs) ] for caption in captions: print(caption) viz = imgviz.instances.instances2rgb( image=rgb, masks=masks, labels=class_ids, captions=captions, font_size=15, ) viz = imgviz.tile([rgb, viz], (1, 2), border=(0, 0, 0)) viz = imgviz.draw.text_in_rectangle(viz, loc="lt", text=image_id, size=25, background=(0, 255, 0)) return viz
def images(generator, Ts_cam2world): depth2rgb = imgviz.Depth2RGB() n_points = len(Ts_cam2world) for i, T_cam2world in enumerate(Ts_cam2world): # generator.debug_render(T_cam2world) rgb, depth, ins, cls = generator.render( T_cam2world, fovy=45, height=480, width=640, ) viz = imgviz.tile( [ rgb, depth2rgb(depth), imgviz.label2rgb(ins + 1, rgb), imgviz.label2rgb(cls, rgb), ], border=(255, 255, 255), ) viz = imgviz.resize(viz, width=1000) font_size = 25 text = f"{i + 1:04d} / {n_points:04d}" size = imgviz.draw.text_size(text, font_size) viz = imgviz.draw.rectangle(viz, (1, 1), size, outline=(0, 255, 0), fill=(0, 255, 0)) viz = imgviz.draw.text(viz, (1, 1), text, color=(0, 0, 0), size=font_size) imgviz.io.cv_imshow(viz) imgviz.io.cv_waitkey(10)
labelviz = imgviz.label2rgb(class_label, label_names=data["class_names"]) # instance bboxes rgb = data["rgb"] bboxes = data["bboxes"].astype(int) labels = data["labels"] captions = [data["class_names"][l] for l in labels] bboxviz = imgviz.instances2rgb(image=rgb, bboxes=bboxes, labels=labels, captions=captions) # instance masks masks = data["masks"] == 1 maskviz = imgviz.instances2rgb(image=rgb, masks=masks, labels=labels, captions=captions) # tile instance masks insviz = [(rgb * m[:, :, None])[b[0] : b[2], b[1] : b[3]] for b, m in zip(bboxes, masks)] insviz = imgviz.tile(imgs=insviz, border=(255, 255, 255)) # tile visualization tiled = imgviz.tile( [rgb, depthviz, labelviz, bboxviz, maskviz, insviz], shape=(1, 6), border=(255, 255, 255), border_width=5, ) # }} GETTING_STARTED # ----------------------------------------------------------------------------- out_file = osp.join(here, ".readme/getting_started.jpg") imgviz.io.imsave(out_file, tiled) img = imgviz.io.imread(out_file)
morefusion.extra.pybullet.init_world() pybullet.resetDebugVisualizerCamera( cameraDistance=0.5, cameraYaw=45, cameraPitch=-45, cameraTargetPosition=(0, 0, 0), ) morefusion.extra.pybullet.add_model( visual_file=cad_file, position=(0, 0, 0.3), ) for _ in range(1000): pybullet.stepSimulation() rgb, depth, segm = morefusion.extra.pybullet.get_debug_visualizer_image() morefusion.extra.pybullet.del_world() viz = imgviz.tile( [rgb, imgviz.depth2rgb(depth), imgviz.label2rgb(segm)], shape=(1, 3), border=(255, 255, 255), ) viz = imgviz.resize(viz, width=1500) imgviz.io.pyglet_imshow(viz) imgviz.io.pyglet_run()
paths.remove(OBJECT_MODEL_PATH + '/' + dataset + '/models/obj_000007.ply') # crayola crayon paths.remove(OBJECT_MODEL_PATH + '/' + dataset + '/models/obj_000021.ply') # crayola crayon # visualization imgs = [] print("Loading", dataset) for path in tqdm(paths): m = vedo.load(path) if dataset in ["ycbv", "ruapc", "kit", "bigbird"]: m = m.texture(path[:-3] + "png") m.show(interactive=False, viewup='z') vedo.screenshot('tmp.png') img = create_thumbnail('tmp.png', 256) imgs.append(np.uint8(img)) tiled = imgviz.tile(imgs=imgs, border=(255, 255, 255), cval=(255, 255, 255)) plt.figure(dpi=700) plt.title(dataset) plt.imshow(tiled) plt.axis("off") img = imgviz.io.pyplot_to_numpy() plt.close() img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imwrite(OUTPUT_PATH + '/' + dataset + '.png', img)
def _callback(self, cam_msg, rgb_msg, depth_msg, ins_msg, cls_msg, noentry_msg=None): bridge = cv_bridge.CvBridge() rgb = bridge.imgmsg_to_cv2(rgb_msg, desired_encoding="rgb8") depth = bridge.imgmsg_to_cv2(depth_msg) if depth.dtype == np.uint16: depth = depth.astype(np.float32) / 1000 depth[depth == 0] = np.nan assert depth.dtype == np.float32 K = np.array(cam_msg.K).reshape(3, 3) pcd = morefusion.geometry.pointcloud_from_depth( depth, K[0, 0], K[1, 1], K[0, 2], K[1, 2]) ins = bridge.imgmsg_to_cv2(ins_msg) grids_noentry = {} if noentry_msg: for grid in noentry_msg.grids: instance_id = grid.instance_id dims = (grid.dims.x, grid.dims.y, grid.dims.z) indices = np.array(grid.indices) k = indices % grid.dims.z j = indices // grid.dims.z % grid.dims.y i = indices // grid.dims.z // grid.dims.y grid_nontarget_empty = np.zeros(dims, dtype=bool) grid_nontarget_empty[i, j, k] = True origin = np.array( [grid.origin.x, grid.origin.y, grid.origin.z], dtype=np.float32, ) grids_noentry[instance_id] = dict( origin=origin, pitch=grid.pitch, matrix=grid_nontarget_empty, ) instance_ids = [] class_ids = [] for cls in cls_msg.classes: instance_ids.append(cls.instance_id) class_ids.append(cls.class_id) instance_ids = np.array(instance_ids) class_ids = np.array(class_ids) examples = [] keep = [] nanmask = np.isnan(pcd).any(axis=2) for i, (ins_id, cls_id) in enumerate(zip(instance_ids, class_ids)): if self._with_occupancy and ins_id not in grids_noentry: # it is inactive in some reason (e.g., on the edge) continue mask = ins == ins_id if (~nanmask & mask).sum() < 50: continue bbox = morefusion.geometry.masks_to_bboxes([mask])[0] y1, x1, y2, x2 = bbox.round().astype(int) rgb_ins = rgb[y1:y2, x1:x2].copy() rgb_ins[~mask[y1:y2, x1:x2]] = 0 rgb_ins = imgviz.centerize(rgb_ins, (256, 256), cval=0) pcd_ins = pcd[y1:y2, x1:x2].copy() pcd_ins[~mask[y1:y2, x1:x2]] = np.nan pcd_ins = imgviz.centerize(pcd_ins, (256, 256), cval=np.nan, interpolation="nearest") example = dict( class_id=cls_id, rgb=rgb_ins, pcd=pcd_ins, ) if grids_noentry: example["origin"] = grids_noentry[ins_id]["origin"] example["pitch"] = grids_noentry[ins_id]["pitch"] example["grid_nontarget_empty"] = grids_noentry[ins_id][ "matrix"] examples.append(example) keep.append(i) if not examples: return inputs = chainer.dataset.concat_examples(examples, device=0) instance_ids = instance_ids[keep] del class_ids if self._pub_debug_rgbd.get_num_connections() > 0: debug_rgbd = [ imgviz.tile( [e["rgb"], imgviz.depth2rgb(e["pcd"][:, :, 2])], (1, 2)) for e in examples ] debug_rgbd = imgviz.tile(debug_rgbd, border=(255, 255, 255)) debug_rgbd_msg = bridge.cv2_to_imgmsg(debug_rgbd, encoding="rgb8") debug_rgbd_msg.header = rgb_msg.header self._pub_debug_rgbd.publish(debug_rgbd_msg) with chainer.no_backprop_mode(), chainer.using_config("train", False): quaternion, translation, confidence = self._model.predict(**inputs) indices = confidence.array.argmax(axis=1) B = quaternion.shape[0] confidence = confidence[np.arange(B), indices] quaternion = quaternion[np.arange(B), indices] translation = translation[np.arange(B), indices] confidence = chainer.cuda.to_cpu(confidence.array) quaternion = chainer.cuda.to_cpu(quaternion.array) translation = chainer.cuda.to_cpu(translation.array) """ transforms = morefusion.functions.transformation_matrix( quaternion, translation ).array for i in range(B): pcd_cad = self._models.get_pcd(examples[i]['class_id']) pcd_depth = examples[i]['pcd'] pcd_depth = pcd_depth[~np.isnan(pcd_depth).any(axis=2)] icp = morefusion.contrib.ICPRegistration( pcd_depth=pcd_depth, pcd_cad=pcd_cad, transform_init=transforms[i], ) transform = icp.register() quaternion[i] = ttf.quaternion_from_matrix(transform) translation[i] = ttf.translation_from_matrix(transform) del transforms """ poses = ObjectPoseArray() poses.header = rgb_msg.header for i, (ins_id, example) in enumerate(zip(instance_ids, examples)): """ cls_id = example['class_id'] class_name = morefusion.datasets.ycb_video.class_names[cls_id] morefusion.ros.loginfo_green( f'instance_id={ins_id}, class_id={cls_id}, ' f'class_name={class_name}, confidence={confidence[i].item()}' ) """ if confidence[i].item() < 0.9: continue pose = ObjectPose() pose.pose.position.x = translation[i][0] pose.pose.position.y = translation[i][1] pose.pose.position.z = translation[i][2] pose.pose.orientation.w = quaternion[i][0] pose.pose.orientation.x = quaternion[i][1] pose.pose.orientation.y = quaternion[i][2] pose.pose.orientation.z = quaternion[i][3] pose.instance_id = ins_id pose.class_id = examples[i]["class_id"] poses.poses.append(pose) self._pub_poses.publish(poses)
def _process_frame(self, frame): meta = frame["meta"] color = frame["color"] depth = frame["depth"] depth_viz = imgviz.depth2rgb(depth, min_value=0, max_value=2) label = frame["label"] label_viz = imgviz.label2rgb(label) labels = meta["cls_indexes"].astype(np.int32) # NOTE: cls_mask is the same as ins_mask in YCB_Video_Dataset masks = np.asarray([label == cls_id for cls_id in labels]) bboxes = morefusion.geometry.masks_to_bboxes(masks) keep = ~(bboxes == 0).all(axis=1) labels = labels[keep] bboxes = bboxes[keep] masks = masks[keep] gray = imgviz.gray2rgb(imgviz.rgb2gray(color)) ins_viz = imgviz.instances2rgb(gray, labels=labels, bboxes=bboxes, masks=masks) vertmap = meta["vertmap"] vertmap[label == 0] = np.nan vert_viz_x = imgviz.depth2rgb(vertmap[:, :, 0]) vert_viz_y = imgviz.depth2rgb(vertmap[:, :, 1]) vert_viz_z = imgviz.depth2rgb(vertmap[:, :, 2]) roi_viz_color = [] roi_viz_depth = [] roi_viz_label = [] for bbox, mask in zip(bboxes, masks): y1, x1, y2, x2 = bbox.round().astype(int) mask_roi = mask[y1:y2, x1:x2] color_roi = color[y1:y2, x1:x2].copy() color_roi[~mask_roi] = 0 depth_roi = depth_viz[y1:y2, x1:x2].copy() depth_roi[~mask_roi] = 0 label_roi = label_viz[y1:y2, x1:x2].copy() label_roi[~mask_roi] = 0 roi_viz_color.append(color_roi) roi_viz_depth.append(depth_roi) roi_viz_label.append(label_roi) roi_viz_color = imgviz.tile(roi_viz_color, border=(255, 255, 255)) roi_viz_depth = imgviz.tile(roi_viz_depth, border=(255, 255, 255)) roi_viz_label = imgviz.tile(roi_viz_label, border=(255, 255, 255)) viz = imgviz.tile( [ color, depth_viz, label_viz, ins_viz, vert_viz_x, vert_viz_y, vert_viz_z, np.zeros_like(color), roi_viz_color, roi_viz_depth, roi_viz_label, np.zeros_like(roi_viz_color), ], shape=(3, 4), border=(255, 255, 255), ) viz = imgviz.centerize(viz, (1000, 1000)) return viz
def get_scene(dataset): camera = trimesh.scene.Camera(fov=(30, 22.5)) index = 0 frame = dataset.get_frame(index) examples = dataset.get_example(index) scenes = { "rgb": None, } camera_transform = morefusion.extra.trimesh.to_opengl_transform() vizs = [frame["rgb"]] for i, example in enumerate(examples): viz = imgviz.tile( [ example["rgb"], imgviz.depth2rgb(example["pcd"][:, :, 0]), imgviz.depth2rgb(example["pcd"][:, :, 1]), imgviz.depth2rgb(example["pcd"][:, :, 2]), ], border=(255, 255, 255), ) viz = imgviz.draw.text_in_rectangle( viz, "lt", f"visibility: {example['visibility']:.0%}", size=30, background=(0, 255, 0), color=(0, 0, 0), ) vizs.append(viz) geom = trimesh.voxel.VoxelGrid( example["grid_target"], ttf.scale_and_translate(example["pitch"], example["origin"]), ).as_boxes(colors=(1.0, 0, 0, 0.5)) scenes[f"occupied_{i:04d}"] = trimesh.Scene( geom, camera=camera, camera_transform=camera_transform) geom = trimesh.voxel.VoxelGrid( example["grid_nontarget"], ttf.scale_and_translate(example["pitch"], example["origin"]), ).as_boxes(colors=(0, 1.0, 0, 0.5)) scenes[f"occupied_{i:04d}"].add_geometry(geom) geom = trimesh.voxel.VoxelGrid( example["grid_empty"], ttf.scale_and_translate(example["pitch"], example["origin"]), ).as_boxes(colors=(0.5, 0.5, 0.5, 0.5)) scenes[f"empty_{i:04d}"] = trimesh.Scene( geom, camera=camera, camera_transform=camera_transform) scenes[f"full_occupied_{i:04d}"] = trimesh.Scene( camera=camera, camera_transform=camera_transform) if (example["grid_target_full"] > 0).any(): geom = trimesh.voxel.VoxelGrid( example["grid_target_full"], ttf.scale_and_translate(example["pitch"], example["origin"]), ).as_boxes(colors=(1.0, 0, 0, 0.5)) scenes[f"full_occupied_{i:04d}"].add_geometry(geom) if (example["grid_nontarget_full"] > 0).any(): colors = imgviz.label2rgb( example["grid_nontarget_full"].reshape(1, -1) + 1).reshape(example["grid_nontarget_full"].shape + (3, )) geom = trimesh.voxel.VoxelGrid( example["grid_nontarget_full"], ttf.scale_and_translate(example["pitch"], example["origin"]), ).as_boxes(colors=colors) scenes[f"full_occupied_{i:04d}"].add_geometry(geom) dim = example["grid_target"].shape[0] extents = np.array([dim, dim, dim]) * example["pitch"] geom = trimesh.path.creation.box_outline(extents) geom.apply_translation(example["origin"] + (dim / 2 - 0.5) * example["pitch"]) scenes[f"occupied_{i:04d}"].add_geometry(geom) scenes[f"empty_{i:04d}"].add_geometry(geom) viz = imgviz.tile(vizs) scenes["rgb"] = viz return scenes
def __getitem__(self, index): index = self.indices[index] result_file = args.result / f"{index:04d}.mat" # NOQA print(result_file) result = scipy.io.loadmat(result_file, chars_as_strings=True, squeeze_me=True) frame_id = "/".join(result["frame_id"].split("/")[1:]) frame = morefusion.datasets.YCBVideoDataset.get_frame(frame_id) rgb = frame["color"] depth = frame["depth"] bboxes = result["bboxes"] K = frame["meta"]["intrinsic_matrix"] labels = result["labels"].astype(np.int32) masks = result["masks"].astype(bool) keep = np.isin(labels, frame["meta"]["cls_indexes"]) bboxes = bboxes[keep] labels = labels[keep] masks = masks[keep] captions = [ morefusion.datasets.ycb_video.class_names[l] for l in labels ] detections_viz = imgviz.instances2rgb( rgb, labels=labels, bboxes=bboxes, masks=masks, captions=captions, font_size=15, ) camera = trimesh.scene.Camera(resolution=(640, 480), focal=(K[0, 0], K[1, 1])) pybullet.connect(pybullet.DIRECT) for class_id, pose in zip(labels, result["poses"]): cad_file = morefusion.datasets.YCBVideoModels().get_cad_file( class_id=class_id) morefusion.extra.pybullet.add_model( cad_file, position=pose[4:], orientation=pose[:4][[1, 2, 3, 0]], ) ( rgb_rend, depth_rend, segm_rend, ) = morefusion.extra.pybullet.render_camera(np.eye(4), fovy=camera.fov[1], height=480, width=640) pybullet.disconnect() min_value = 0.3 max_value = 2 * np.nanmedian(depth) - min_value depth = imgviz.depth2rgb(depth, min_value=min_value, max_value=max_value) depth_rend = imgviz.depth2rgb(depth_rend, min_value=min_value, max_value=max_value) viz = imgviz.tile( [rgb, depth, detections_viz, rgb_rend, depth_rend], (2, 3), border=(255, ) * 3, ) viz = imgviz.resize(viz, width=1500) return viz
here = osp.dirname(osp.abspath(__file__)) if __name__ == '__main__': data = imgviz.data.arc2017() rgb = data['rgb'] bboxes = data['bboxes'].astype(int) masks = data['masks'] == 1 crops = [] for bbox, mask in zip(bboxes, masks): slice_ = slice(bbox[0], bbox[2]), slice(bbox[1], bbox[3]) rgb_crop = rgb[slice_] mask_crop = mask[slice_] crops.append(rgb_crop * mask_crop[:, :, None]) tiled = imgviz.tile(imgs=crops, border=(255, 255, 255)) # ------------------------------------------------------------------------- plt.figure(dpi=200) plt.subplot(121) plt.title('original') plt.imshow(rgb) plt.axis('off') plt.subplot(122) plt.title('instances') plt.imshow(tiled) plt.axis('off')
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("model", help="model file in a log dir") parser.add_argument("--gpu", type=int, default=0, help="gpu id") parser.add_argument("--save", action="store_true", help="save") args = parser.parse_args() args_file = path.Path(args.model).parent / "args" with open(args_file) as f: args_data = json.load(f) pprint.pprint(args_data) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model = singleview_3d.models.Model( n_fg_class=len(args_data["class_names"][1:]), pretrained_resnet18=args_data["pretrained_resnet18"], with_occupancy=args_data["with_occupancy"], loss=args_data["loss"], loss_scale=args_data["loss_scale"], ) if args.gpu >= 0: model.to_gpu() print(f"==> Loading trained model: {args.model}") chainer.serializers.load_npz(args.model, model) print("==> Done model loading") split = "val" dataset = morefusion.datasets.YCBVideoRGBDPoseEstimationDataset( split=split) dataset_reindexed = morefusion.datasets.YCBVideoRGBDPoseEstimationDatasetReIndexed( # NOQA split=split, class_ids=args_data["class_ids"], ) transform = Transform( train=False, with_occupancy=args_data["with_occupancy"], ) pprint.pprint(args.__dict__) # ------------------------------------------------------------------------- depth2rgb = imgviz.Depth2RGB() for index in range(len(dataset)): frame = dataset.get_frame(index) image_id = dataset._ids[index] indices = dataset_reindexed.get_indices_from_image_id(image_id) examples = dataset_reindexed[indices] examples = [transform(example) for example in examples] if not examples: continue inputs = chainer.dataset.concat_examples(examples, device=args.gpu) with chainer.no_backprop_mode() and chainer.using_config( "train", False): quaternion_pred, translation_pred, confidence_pred = model.predict( class_id=inputs["class_id"], rgb=inputs["rgb"], pcd=inputs["pcd"], pitch=inputs.get("pitch"), origin=inputs.get("origin"), grid_nontarget_empty=inputs.get("grid_nontarget_empty"), ) indices = model.xp.argmax(confidence_pred.array, axis=1) quaternion_pred = quaternion_pred[ model.xp.arange(quaternion_pred.shape[0]), indices] translation_pred = translation_pred[ model.xp.arange(translation_pred.shape[0]), indices] reporter = chainer.Reporter() reporter.add_observer("main", model) observation = {} with reporter.scope(observation): model.evaluate( class_id=inputs["class_id"], quaternion_true=inputs["quaternion_true"], translation_true=inputs["translation_true"], quaternion_pred=quaternion_pred, translation_pred=translation_pred, ) # TODO(wkentaro) observation_new = {} for k, v in observation.items(): if re.match(r"main/add_or_add_s/[0-9]+/.+", k): k_new = "/".join(k.split("/")[:-1]) observation_new[k_new] = v observation = observation_new print(f"[{index:08d}] {observation}") # --------------------------------------------------------------------- K = frame["intrinsic_matrix"] height, width = frame["rgb"].shape[:2] fovy = trimesh.scene.Camera(resolution=(width, height), focal=(K[0, 0], K[1, 1])).fov[1] batch_size = len(inputs["class_id"]) class_ids = cuda.to_cpu(inputs["class_id"]) quaternion_pred = cuda.to_cpu(quaternion_pred.array) translation_pred = cuda.to_cpu(translation_pred.array) quaternion_true = cuda.to_cpu(inputs["quaternion_true"]) translation_true = cuda.to_cpu(inputs["translation_true"]) Ts_pred = [] Ts_true = [] for i in range(batch_size): # T_cad2cam T_pred = tf.quaternion_matrix(quaternion_pred[i]) T_pred[:3, 3] = translation_pred[i] T_true = tf.quaternion_matrix(quaternion_true[i]) T_true[:3, 3] = translation_true[i] Ts_pred.append(T_pred) Ts_true.append(T_true) Ts = dict(true=Ts_true, pred=Ts_pred) vizs = [] depth_viz = depth2rgb(frame["depth"]) for which in ["true", "pred"]: pybullet.connect(pybullet.DIRECT) for i, T in enumerate(Ts[which]): cad_file = morefusion.datasets.YCBVideoModels().get_cad_file( class_id=class_ids[i]) morefusion.extra.pybullet.add_model( cad_file, position=tf.translation_from_matrix(T), orientation=tf.quaternion_from_matrix(T)[[1, 2, 3, 0]], ) ( rgb_rend, depth_rend, segm_rend, ) = morefusion.extra.pybullet.render_camera( np.eye(4), fovy, height, width) pybullet.disconnect() segm_rend = imgviz.label2rgb(segm_rend + 1, img=frame["rgb"], alpha=0.7) depth_rend = depth2rgb(depth_rend) rgb_input = imgviz.tile(cuda.to_cpu(inputs["rgb"]), border=(255, 255, 255)) viz = imgviz.tile( [ frame["rgb"], depth_viz, rgb_input, segm_rend, rgb_rend, depth_rend, ], (1, 6), border=(255, 255, 255), ) viz = imgviz.resize(viz, width=1800) if which == "pred": text = [] for class_id in np.unique(class_ids): add = observation[f"main/add_or_add_s/{class_id:04d}"] text.append(f"[{which}] [{class_id:04d}]: " f"add/add_s={add * 100:.1f}cm") text = "\n".join(text) else: text = f"[{which}]" viz = imgviz.draw.text_in_rectangle( viz, loc="lt", text=text, size=20, background=(0, 255, 0), color=(0, 0, 0), ) if which == "true": viz = imgviz.draw.text_in_rectangle( viz, loc="rt", text="singleview_3d", size=20, background=(255, 0, 0), color=(0, 0, 0), ) vizs.append(viz) viz = imgviz.tile(vizs, (2, 1), border=(255, 255, 255)) if args.save: out_file = path.Path(args.model).parent / f"video/{index:08d}.jpg" out_file.parent.makedirs_p() imgviz.io.imsave(out_file, viz) yield viz