def cou_bb_proj(R_est, t_est, R_gt, t_gt, K, renderer, obj_id): """Complement over Union of projected 2D bounding boxes. :param R_est: 3x3 ndarray with the estimated rotation matrix. :param t_est: 3x1 ndarray with the estimated translation vector. :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. :param t_gt: 3x1 ndarray with the ground-truth translation vector. :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param obj_id: Object identifier. :return: The calculated error. """ # Render depth images of the model at the estimated and the ground-truth pose. fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] depth_est = renderer.render_object( obj_id, R_est, t_est, fx, fy, cx, cy)['depth'] depth_gt = renderer.render_object( obj_id, R_gt, t_gt, fx, fy, cx, cy)['depth'] # Masks of the rendered model and their intersection and union mask_est = depth_est > 0 mask_gt = depth_gt > 0 ys_est, xs_est = mask_est.nonzero() bb_est = misc.calc_2d_bbox(xs_est, ys_est, im_size=None, clip=False) ys_gt, xs_gt = mask_gt.nonzero() bb_gt = misc.calc_2d_bbox(xs_gt, ys_gt, im_size=None, clip=False) e = 1.0 - misc.iou(bb_est, bb_gt) return e
px_count_visib = visib_gt.sum() # Visible surface fraction. if px_count_all > 0: visib_fract = px_count_visib / float(px_count_all) else: visib_fract = 0.0 # Bounding box of the whole object silhouette # (including the truncated part). bbox = [-1, -1, -1, -1] if px_count_visib > 0: ys, xs = obj_mask_gt_large.nonzero() ys -= ren_cy_offset xs -= ren_cx_offset bbox = misc.calc_2d_bbox(xs, ys, im_size) # Bounding box of the visible surface part. bbox_visib = [-1, -1, -1, -1] if px_count_visib > 0: ys, xs = visib_gt.nonzero() bbox_visib = misc.calc_2d_bbox(xs, ys, im_size) # Store the calculated info. scene_gt_info[im_id].append({ 'px_count_all': int(px_count_all), 'px_count_valid': int(px_count_valid), 'px_count_visib': int(px_count_visib),
def vis_object_poses( poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, vis_depth_diff_path=None, vis_rgb_resolve_visib=False): """Visualizes 3D object models in specified poses in a single image. Two visualizations are created: 1. An RGB visualization (if vis_rgb_path is not None). 2. A Depth-difference visualization (if vis_depth_diff_path is not None). :param poses: List of dictionaries, each with info about one pose: - 'obj_id': Object ID. - 'R': 3x3 ndarray with a rotation matrix. - 't': 3x1 ndarray with a translation vector. - 'text_info': Info to write at the object (see write_text_on_image). :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param rgb: ndarray with the RGB input image. :param depth: ndarray with the depth input image. :param vis_rgb_path: Path to the output RGB visualization. :param vis_depth_diff_path: Path to the output depth-difference visualization. :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects (i.e. only the closest object is visualized at each pixel). """ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Indicators of visualization types. vis_rgb = vis_rgb_path is not None vis_depth_diff = vis_depth_diff_path is not None if vis_rgb and rgb is None: raise ValueError('RGB visualization triggered but RGB image not provided.') if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: raise ValueError('Depth visualization triggered but D image not provided.') # Prepare images for rendering. im_size = None ren_rgb = None ren_rgb_info = None ren_depth = None if vis_rgb: im_size = (rgb.shape[1], rgb.shape[0]) ren_rgb = np.zeros(rgb.shape, np.uint8) ren_rgb_info = np.zeros(rgb.shape, np.uint8) if vis_depth_diff: if im_size and im_size != (depth.shape[1], depth.shape[0]): raise ValueError('The RGB and D images must have the same size.') else: im_size = (depth.shape[1], depth.shape[0]) if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) # Render the pose estimates one by one. for pose in poses: # Rendering. ren_out = renderer.render_object( pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) m_rgb = None if vis_rgb: m_rgb = ren_out['rgb'] m_mask = None if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): m_depth = ren_out['depth'] # Get mask of the surface parts that are closer than the # surfaces rendered before. visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) m_mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) # Combine the RGB renderings. if vis_rgb: if vis_rgb_resolve_visib: ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) else: ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32) ren_rgb_f[ren_rgb_f > 255] = 255 ren_rgb = ren_rgb_f.astype(np.uint8) # Draw 2D bounding box and write text info. obj_mask = np.sum(m_rgb > 0, axis=2) ys, xs = obj_mask.nonzero() if len(ys): # bbox_color = model_color # text_color = model_color bbox_color = (0.3, 0.3, 0.3) text_color = (1.0, 1.0, 1.0) text_size = 11 bbox = misc.calc_2d_bbox(xs, ys, im_size) im_size = (obj_mask.shape[1], obj_mask.shape[0]) ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) if 'text_info' in pose: text_loc = (bbox[0] + 2, bbox[1]) ren_rgb_info = write_text_on_image( ren_rgb_info, pose['text_info'], text_loc, color=text_color, size=text_size) # Blend and save the RGB visualization. if vis_rgb: vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ 0.5 * ren_rgb.astype(np.float32) + \ 1.0 * ren_rgb_info.astype(np.float32) vis_im_rgb[vis_im_rgb > 255] = 255 misc.ensure_dir(os.path.dirname(vis_rgb_path)) inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) # Save the image of depth differences. if vis_depth_diff: # Calculate the depth difference at pixels where both depth maps # are valid. valid_mask = (depth > 0) * (ren_depth > 0) depth_diff = valid_mask * (depth - ren_depth.astype(np.float32)) f, ax = plt.subplots(1, 1) cax = ax.matshow(depth_diff) ax.axis('off') ax.set_title('captured - GT depth [mm]') f.colorbar(cax, fraction=0.03, pad=0.01) f.tight_layout(pad=0) if not vis_rgb: misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) plt.savefig(vis_depth_diff_path, pad=0, bbox_inches='tight', quality=95) plt.close()
def vis_object_poses( poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, vis_depth_diff_path=None, vis_rgb_resolve_visib=False): """Visualizes 3D object models in specified poses in a single image. Two visualizations are created: 1. An RGB visualization (if vis_rgb_path is not None). 2. A Depth-difference visualization (if vis_depth_diff_path is not None). :param poses: List of dictionaries, each with info about one pose: - 'obj_id': Object ID. - 'R': 3x3 ndarray with a rotation matrix. - 't': 3x1 ndarray with a translation vector. - 'text_info': Info to write at the object (see write_text_on_image). :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param rgb: ndarray with the RGB input image. :param depth: ndarray with the depth input image. :param vis_rgb_path: Path to the output RGB visualization. :param vis_depth_diff_path: Path to the output depth-difference visualization. :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects (i.e. only the closest object is visualized at each pixel). """ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Indicators of visualization types. vis_rgb = vis_rgb_path is not None vis_depth_diff = vis_depth_diff_path is not None if vis_rgb and rgb is None: raise ValueError('RGB visualization triggered but RGB image not provided.') if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: raise ValueError('Depth visualization triggered but D image not provided.') # Prepare images for rendering. im_size = None ren_rgb = None ren_rgb_info = None ren_depth = None if vis_rgb: im_size = (rgb.shape[1], rgb.shape[0]) ren_rgb = np.zeros(rgb.shape, np.uint8) ren_rgb_info = np.zeros(rgb.shape, np.uint8) if vis_depth_diff: if im_size and im_size != (depth.shape[1], depth.shape[0]): raise ValueError('The RGB and D images must have the same size.') else: im_size = (depth.shape[1], depth.shape[0]) if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) # Render the pose estimates one by one. for pose in poses: # Rendering. ren_out = renderer.render_object( pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) m_rgb = None if vis_rgb: m_rgb = ren_out['rgb'] m_mask = None if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): m_depth = ren_out['depth'] # Get mask of the surface parts that are closer than the # surfaces rendered before. visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) m_mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) # Combine the RGB renderings. if vis_rgb: if vis_rgb_resolve_visib: ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) else: ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32) ren_rgb_f[ren_rgb_f > 255] = 255 ren_rgb = ren_rgb_f.astype(np.uint8) # Draw 2D bounding box and write text info. obj_mask = np.sum(m_rgb > 0, axis=2) ys, xs = obj_mask.nonzero() if len(ys): # bbox_color = model_color # text_color = model_color bbox_color = (0.3, 0.3, 0.3) text_color = (1.0, 1.0, 1.0) text_size = 11 bbox = misc.calc_2d_bbox(xs, ys, im_size) im_size = (obj_mask.shape[1], obj_mask.shape[0]) ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) if 'text_info' in pose: text_loc = (bbox[0] + 2, bbox[1]) ren_rgb_info = write_text_on_image( ren_rgb_info, pose['text_info'], text_loc, color=text_color, size=text_size) # Blend and save the RGB visualization. if vis_rgb: misc.ensure_dir(os.path.dirname(vis_rgb_path)) vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ 0.5 * ren_rgb.astype(np.float32) + \ 1.0 * ren_rgb_info.astype(np.float32) vis_im_rgb[vis_im_rgb > 255] = 255 inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) # Save the image of depth differences. if vis_depth_diff: misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) # Calculate the depth difference at pixels where both depth maps are valid. valid_mask = (depth > 0) * (ren_depth > 0) depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth) # Get mask of pixels where the rendered depth is at most by the tolerance # delta behind the captured depth (this tolerance is used in VSD). delta = 15 below_delta = valid_mask * (depth_diff < delta) below_delta_vis = (255 * below_delta).astype(np.uint8) depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min()) # Pixels where the rendered depth is more than the tolerance delta behing # the captured depth will be cyan. depth_diff_vis = np.dstack( [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8) depth_diff_vis[np.logical_not(valid_mask)] = 0 depth_diff_valid = depth_diff[valid_mask] depth_info = [ {'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid)}, {'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid)}, {'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid)}, ] depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) inout.save_im(vis_depth_diff_path, depth_diff_vis)