depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] dist_im = misc.depth_im_to_dist_im(depth_im, K) ''' for gt_id, gt in enumerate(scene_gt[im_id]): # Render the depth image. depth_gt = ren.render_object(gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx, cy)['depth'] # Convert depth image to distance image. dist_gt = misc.depth_im_to_dist_im(depth_gt, K) # Mask of the full object silhouette. mask = dist_gt > 0 ''' # Mask of the visible part of the object silhouette. mask_visib = visibility.estimate_visib_mask_gt( dist_im, dist_gt, p['delta'], visib_mode='bop19') ''' # Save the calculated masks. mask_path = dp_split['mask_tpath'].format(scene_id=scene_id, im_id=im_id, gt_id=gt_id) inout.save_im(mask_path, 255 * mask.astype(np.uint8)) ''' mask_visib_path = dp_split['mask_visib_tpath'].format( scene_id=scene_id, im_id=im_id, gt_id=gt_id) inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) '''
# Convert depth so it is in the same units as other images in the dataset. depth /= float(dp_camera['depth_scale']) # The OpenCV function was used for rendering of the training images # provided for the SIXD Challenge 2017. rgb = cv2.resize(rgb, dp_camera['im_size'], interpolation=cv2.INTER_AREA) # rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic') # Save the rendered images. out_rgb_path = out_rgb_tpath.format(out_path=out_path, obj_id=obj_id, im_id=im_id) inout.save_im(out_rgb_path, rgb) out_depth_path = out_depth_tpath.format(out_path=out_path, obj_id=obj_id, im_id=im_id) inout.save_depth(out_depth_path, depth) # Get 2D bounding box of the object model at the ground truth pose. # ys, xs = np.nonzero(depth > 0) # obj_bb = misc.calc_2d_bbox(xs, ys, dp_camera['im_size']) scene_camera[im_id] = { 'cam_K': dp_camera['K'], 'depth_scale': dp_camera['depth_scale'], 'view_level': int(views_level[view_id]) }
for obj_id in dp_model['obj_ids']: # Load object model. misc.log('Loading 3D model of object {}...'.format(obj_id)) model_path = dp_model['model_tpath'].format(obj_id=obj_id) ren.add_object(obj_id, model_path) poses = misc.get_symmetry_transformations(models_info[obj_id], p['max_sym_disc_step']) for pose_id, pose in enumerate(poses): for view_id, view in enumerate(p['views']): R = view['R'].dot(pose['R']) t = view['R'].dot(pose['t']) + view['t'] vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb'] # Path to the output RGB visualization. vis_rgb_path = p['vis_rgb_tpath'].format(vis_path=p['vis_path'], dataset=p['dataset'], obj_id=obj_id, view_id=view_id, pose_id=pose_id) misc.ensure_dir(os.path.dirname(vis_rgb_path)) inout.save_im(vis_rgb_path, vis_rgb) misc.log('Done.')
}) # Visualization of the visibility mask. if p['vis_visibility_masks']: depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0) depth_im_vis = np.dstack([depth_im_vis] * 3) visib_gt_vis = visib_gt.astype(np.float) zero_ch = np.zeros(visib_gt_vis.shape) visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch]) vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis vis[vis > 1] = 1 vis_path = p['vis_mask_visib_tpath'].format( delta=p['delta'], dataset=p['dataset'], split=p['dataset_split'], scene_id=scene_id, im_id=im_id, gt_id=gt_id) misc.ensure_dir(os.path.dirname(vis_path)) inout.save_im(vis_path, vis) # Save the info for the current scene. scene_gt_info_path = dp_split['scene_gt_info_tpath'].format( scene_id=scene_id) misc.ensure_dir(os.path.dirname(scene_gt_info_path)) inout.save_json(scene_gt_info_path, scene_gt_info)
def vis_object_poses(poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, vis_depth_diff_path=None, vis_rgb_resolve_visib=False): """Visualizes 3D object models in specified poses in a single image. Two visualizations are created: 1. An RGB visualization (if vis_rgb_path is not None). 2. A Depth-difference visualization (if vis_depth_diff_path is not None). :param poses: List of dictionaries, each with info about one pose: - 'obj_id': Object ID. - 'R': 3x3 ndarray with a rotation matrix. - 't': 3x1 ndarray with a translation vector. - 'text_info': Info to write at the object (see write_text_on_image). :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param rgb: ndarray with the RGB input image. :param depth: ndarray with the depth input image. :param vis_rgb_path: Path to the output RGB visualization. :param vis_depth_diff_path: Path to the output depth-difference visualization. :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects (i.e. only the closest object is visualized at each pixel). """ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Indicators of visualization types. vis_rgb = vis_rgb_path is not None vis_depth_diff = vis_depth_diff_path is not None if vis_rgb and rgb is None: raise ValueError( 'RGB visualization triggered but RGB image not provided.') if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: raise ValueError( 'Depth visualization triggered but D image not provided.') # Prepare images for rendering. im_size = None ren_rgb = None ren_rgb_info = None ren_depth = None if vis_rgb: im_size = (rgb.shape[1], rgb.shape[0]) ren_rgb = np.zeros(rgb.shape, np.uint8) ren_rgb_info = np.zeros(rgb.shape, np.uint8) if vis_depth_diff: if im_size and im_size != (depth.shape[1], depth.shape[0]): raise ValueError('The RGB and D images must have the same size.') else: im_size = (depth.shape[1], depth.shape[0]) if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) # Render the pose estimates one by one. for pose in poses: # Rendering. ren_out = renderer.render_object(pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) m_rgb = None if vis_rgb: m_rgb = ren_out['rgb'] m_mask = None if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): m_depth = ren_out['depth'] # Get mask of the surface parts that are closer than the # surfaces rendered before. visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) m_mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) # Combine the RGB renderings. if vis_rgb: if vis_rgb_resolve_visib: ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) else: ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype( np.float32) ren_rgb_f[ren_rgb_f > 255] = 255 ren_rgb = ren_rgb_f.astype(np.uint8) # Draw 2D bounding box and write text info. obj_mask = np.sum(m_rgb > 0, axis=2) ys, xs = obj_mask.nonzero() if len(ys): # bbox_color = model_color # text_color = model_color bbox_color = (0.3, 0.3, 0.3) text_color = (1.0, 1.0, 1.0) text_size = 11 bbox = misc.calc_2d_bbox(xs, ys, im_size) im_size = (obj_mask.shape[1], obj_mask.shape[0]) ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) if 'text_info' in pose: text_loc = (bbox[0] + 2, bbox[1]) ren_rgb_info = write_text_on_image(ren_rgb_info, pose['text_info'], text_loc, color=text_color, size=text_size) # Blend and save the RGB visualization. if vis_rgb: misc.ensure_dir(os.path.dirname(vis_rgb_path)) vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ 0.5 * ren_rgb.astype(np.float32) + \ 1.0 * ren_rgb_info.astype(np.float32) vis_im_rgb[vis_im_rgb > 255] = 255 inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) # Save the image of depth differences. if vis_depth_diff: misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) # Calculate the depth difference at pixels where both depth maps are valid. valid_mask = (depth > 0) * (ren_depth > 0) depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth) delta = 15 below_delta = valid_mask * (depth_diff < delta) below_delta_vis = (255 * below_delta).astype(np.uint8) depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min()) depth_diff_vis = np.dstack( [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8) depth_diff_vis[np.logical_not(valid_mask)] = 0 depth_diff_valid = depth_diff[valid_mask] depth_info = [ { 'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid) }, { 'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid) }, { 'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid) }, ] depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) inout.save_im(vis_depth_diff_path, depth_diff_vis)