def save_errors(_error_sign, _scene_errs): # Save the calculated errors to a JSON file. errors_path = p['out_errors_tpath'].format(result_name=result_name, error_sign=_error_sign, scene_id=scene_id) misc.ensure_dir(os.path.dirname(errors_path)) misc.log('Saving errors to: {}'.format(errors_path)) inout.save_json(errors_path, _scene_errs)
def fragmentation_fps_test(): output_dir = 'fragmentation_test_output' misc.ensure_dir(output_dir) datasets = ['hb', 'ycbv', 'tless', 'lmo', 'icbin', 'itodd', 'tudl'] for dataset in datasets: model_type = None if dataset == 'tless': model_type = 'reconst' elif dataset == 'itodd': model_type = 'dense' dp_model = dataset_params.get_model_params(config.BOP_PATH, dataset, model_type) for obj_id in dp_model['obj_ids']: print('Fragmenting object {} from dataset {}...'.format( obj_id, dataset)) model_fpath = dp_model['model_tpath'].format(obj_id=obj_id) model = inout.load_ply(model_fpath) # Fragmentation by the furthest point sampling. frag_centers, vertex_frag_ids = \ fragment.fragmentation_fps(model['pts'], num_frags=256) # Fragment colors. frag_colors = frag_centers - frag_centers.min() frag_colors = (255.0 * frag_colors / frag_colors.max()).astype( np.uint8) # Color the model points by the fragment colors. pts_colors = np.zeros((model['pts'].shape[0], 3), np.uint8) for frag_id in range(len(frag_centers)): pts_colors[vertex_frag_ids == frag_id] = frag_colors[frag_id] inout.save_ply( os.path.join( output_dir, '{}_obj_{:02d}_fragments.ply'.format(dataset, obj_id)), { 'pts': model['pts'], 'faces': model['faces'], 'colors': pts_colors })
def end(self, session): global_step = training_util.global_step(session, self.global_step) # Save the confusion matrix to a text file. df = pd.DataFrame(self.cm) cm_table = tabulate(df, headers='keys', tablefmt='psql') cm_path = os.path.join(self.log_dir, 'cm_{}.txt'.format(global_step)) misc.ensure_dir(os.path.dirname(cm_path)) with open(cm_path, 'w') as f: f.write(cm_table) # Calculate mIoU of object segmentation. bg_iou = 1.0 fg_ious = [] for cls in range(self.num_cls): intersection = self.cm[cls, cls] union = np.sum(self.cm[cls, :]) + np.sum( self.cm[:, cls]) - intersection if union > 0: iou = intersection / float(union) if cls == 0: bg_iou = iou else: fg_ious.append(iou) if len(fg_ious): miou_fg = np.mean(fg_ious) miou_all = np.mean(fg_ious + [bg_iou]) else: miou_fg = 0.0 miou_all = 0.0 # mIoU calculated over foreground and background classes. self.add_scalar_summary('eval/obj_cls_miou_all', miou_all, global_step) # mIoU calculated only over foreground classes. self.add_scalar_summary('eval/obj_cls_miou_fg', miou_fg, global_step) self.summary_writer.flush()
}) # Visualization of the visibility mask. if p['vis_visibility_masks']: depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0) depth_im_vis = np.dstack([depth_im_vis] * 3) visib_gt_vis = visib_gt.astype(np.float) zero_ch = np.zeros(visib_gt_vis.shape) visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch]) vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis vis[vis > 1] = 1 vis_path = p['vis_mask_visib_tpath'].format( delta=p['delta'], dataset=p['dataset'], split=p['dataset_split'], scene_id=scene_id, im_id=im_id, gt_id=gt_id) misc.ensure_dir(os.path.dirname(vis_path)) inout.save_im(vis_path, vis) # Save the info for the current scene. scene_gt_info_path = dp_split['scene_gt_info_tpath'].format( scene_id=scene_id) misc.ensure_dir(os.path.dirname(scene_gt_info_path)) inout.save_json(scene_gt_info_path, scene_gt_info)
for scene_id in dp_split['scene_ids']: # Load scene GT. scene_gt_path = dp_split['scene_gt_tpath'].format(scene_id=scene_id) scene_gt = inout.load_scene_gt(scene_gt_path) # Load scene camera. scene_camera_path = dp_split['scene_camera_tpath'].format( scene_id=scene_id) scene_camera = inout.load_scene_camera(scene_camera_path) # Create folders for the output masks (if they do not exist yet). mask_dir_path = os.path.dirname(dp_split['mask_tpath'].format( scene_id=scene_id, im_id=0, gt_id=0)) misc.ensure_dir(mask_dir_path) mask_visib_dir_path = os.path.dirname(dp_split['mask_visib_tpath'].format( scene_id=scene_id, im_id=0, gt_id=0)) misc.ensure_dir(mask_visib_dir_path) # Initialize a renderer. misc.log('Initializing renderer...') width, height = dp_split['im_size'] ren = renderer.create_renderer(width, height, renderer_type=p['renderer_type'], mode='depth') # Add object models. for obj_id in dp_model['obj_ids']:
def vis_object_poses( poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, vis_depth_diff_path=None, vis_rgb_resolve_visib=False): """Visualizes 3D object models in specified poses in a single image. Two visualizations are created: 1. An RGB visualization (if vis_rgb_path is not None). 2. A Depth-difference visualization (if vis_depth_diff_path is not None). :param poses: List of dictionaries, each with info about one pose: - 'obj_id': Object ID. - 'R': 3x3 ndarray with a rotation matrix. - 't': 3x1 ndarray with a translation vector. - 'text_info': Info to write at the object (see write_text_on_image). :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param rgb: ndarray with the RGB input image. :param depth: ndarray with the depth input image. :param vis_rgb_path: Path to the output RGB visualization. :param vis_depth_diff_path: Path to the output depth-difference visualization. :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects (i.e. only the closest object is visualized at each pixel). """ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Indicators of visualization types. vis_rgb = vis_rgb_path is not None vis_depth_diff = vis_depth_diff_path is not None if vis_rgb and rgb is None: raise ValueError('RGB visualization triggered but RGB image not provided.') if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: raise ValueError('Depth visualization triggered but D image not provided.') # Prepare images for rendering. im_size = None ren_rgb = None ren_rgb_info = None ren_depth = None if vis_rgb: im_size = (rgb.shape[1], rgb.shape[0]) ren_rgb = np.zeros(rgb.shape, np.uint8) ren_rgb_info = np.zeros(rgb.shape, np.uint8) if vis_depth_diff: if im_size and im_size != (depth.shape[1], depth.shape[0]): raise ValueError('The RGB and D images must have the same size.') else: im_size = (depth.shape[1], depth.shape[0]) if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) # Render the pose estimates one by one. for pose in poses: # Rendering. ren_out = renderer.render_object( pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) m_rgb = None if vis_rgb: m_rgb = ren_out['rgb'] m_mask = None if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): m_depth = ren_out['depth'] # Get mask of the surface parts that are closer than the # surfaces rendered before. visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) m_mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) # Combine the RGB renderings. if vis_rgb: if vis_rgb_resolve_visib: ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) else: ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32) ren_rgb_f[ren_rgb_f > 255] = 255 ren_rgb = ren_rgb_f.astype(np.uint8) # Draw 2D bounding box and write text info. obj_mask = np.sum(m_rgb > 0, axis=2) ys, xs = obj_mask.nonzero() if len(ys): # bbox_color = model_color # text_color = model_color bbox_color = (0.3, 0.3, 0.3) text_color = (1.0, 1.0, 1.0) text_size = 11 bbox = misc.calc_2d_bbox(xs, ys, im_size) im_size = (obj_mask.shape[1], obj_mask.shape[0]) ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) if 'text_info' in pose: text_loc = (bbox[0] + 2, bbox[1]) ren_rgb_info = write_text_on_image( ren_rgb_info, pose['text_info'], text_loc, color=text_color, size=text_size) # Blend and save the RGB visualization. if vis_rgb: vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ 0.5 * ren_rgb.astype(np.float32) + \ 1.0 * ren_rgb_info.astype(np.float32) vis_im_rgb[vis_im_rgb > 255] = 255 misc.ensure_dir(os.path.dirname(vis_rgb_path)) inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) # Save the image of depth differences. if vis_depth_diff: # Calculate the depth difference at pixels where both depth maps # are valid. valid_mask = (depth > 0) * (ren_depth > 0) depth_diff = valid_mask * (depth - ren_depth.astype(np.float32)) f, ax = plt.subplots(1, 1) cax = ax.matshow(depth_diff) ax.axis('off') ax.set_title('captured - GT depth [mm]') f.colorbar(cax, fraction=0.03, pad=0.01) f.tight_layout(pad=0) if not vis_rgb: misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) plt.savefig(vis_depth_diff_path, pad=0, bbox_inches='tight', quality=95) plt.close()
def vis_object_poses( poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, vis_depth_diff_path=None, vis_rgb_resolve_visib=False): """Visualizes 3D object models in specified poses in a single image. Two visualizations are created: 1. An RGB visualization (if vis_rgb_path is not None). 2. A Depth-difference visualization (if vis_depth_diff_path is not None). :param poses: List of dictionaries, each with info about one pose: - 'obj_id': Object ID. - 'R': 3x3 ndarray with a rotation matrix. - 't': 3x1 ndarray with a translation vector. - 'text_info': Info to write at the object (see write_text_on_image). :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param rgb: ndarray with the RGB input image. :param depth: ndarray with the depth input image. :param vis_rgb_path: Path to the output RGB visualization. :param vis_depth_diff_path: Path to the output depth-difference visualization. :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects (i.e. only the closest object is visualized at each pixel). """ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Indicators of visualization types. vis_rgb = vis_rgb_path is not None vis_depth_diff = vis_depth_diff_path is not None if vis_rgb and rgb is None: raise ValueError('RGB visualization triggered but RGB image not provided.') if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: raise ValueError('Depth visualization triggered but D image not provided.') # Prepare images for rendering. im_size = None ren_rgb = None ren_rgb_info = None ren_depth = None if vis_rgb: im_size = (rgb.shape[1], rgb.shape[0]) ren_rgb = np.zeros(rgb.shape, np.uint8) ren_rgb_info = np.zeros(rgb.shape, np.uint8) if vis_depth_diff: if im_size and im_size != (depth.shape[1], depth.shape[0]): raise ValueError('The RGB and D images must have the same size.') else: im_size = (depth.shape[1], depth.shape[0]) if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) # Render the pose estimates one by one. for pose in poses: # Rendering. ren_out = renderer.render_object( pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) m_rgb = None if vis_rgb: m_rgb = ren_out['rgb'] m_mask = None if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): m_depth = ren_out['depth'] # Get mask of the surface parts that are closer than the # surfaces rendered before. visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) m_mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) # Combine the RGB renderings. if vis_rgb: if vis_rgb_resolve_visib: ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) else: ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32) ren_rgb_f[ren_rgb_f > 255] = 255 ren_rgb = ren_rgb_f.astype(np.uint8) # Draw 2D bounding box and write text info. obj_mask = np.sum(m_rgb > 0, axis=2) ys, xs = obj_mask.nonzero() if len(ys): # bbox_color = model_color # text_color = model_color bbox_color = (0.3, 0.3, 0.3) text_color = (1.0, 1.0, 1.0) text_size = 11 bbox = misc.calc_2d_bbox(xs, ys, im_size) im_size = (obj_mask.shape[1], obj_mask.shape[0]) ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) if 'text_info' in pose: text_loc = (bbox[0] + 2, bbox[1]) ren_rgb_info = write_text_on_image( ren_rgb_info, pose['text_info'], text_loc, color=text_color, size=text_size) # Blend and save the RGB visualization. if vis_rgb: misc.ensure_dir(os.path.dirname(vis_rgb_path)) vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ 0.5 * ren_rgb.astype(np.float32) + \ 1.0 * ren_rgb_info.astype(np.float32) vis_im_rgb[vis_im_rgb > 255] = 255 inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) # Save the image of depth differences. if vis_depth_diff: misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) # Calculate the depth difference at pixels where both depth maps are valid. valid_mask = (depth > 0) * (ren_depth > 0) depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth) # Get mask of pixels where the rendered depth is at most by the tolerance # delta behind the captured depth (this tolerance is used in VSD). delta = 15 below_delta = valid_mask * (depth_diff < delta) below_delta_vis = (255 * below_delta).astype(np.uint8) depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min()) # Pixels where the rendered depth is more than the tolerance delta behing # the captured depth will be cyan. depth_diff_vis = np.dstack( [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8) depth_diff_vis[np.logical_not(valid_mask)] = 0 depth_diff_valid = depth_diff[valid_mask] depth_info = [ {'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid)}, {'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid)}, {'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid)}, ] depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) inout.save_im(vis_depth_diff_path, depth_diff_vis)
for obj_id in dp_model['obj_ids']: # Load object model. misc.log('Loading 3D model of object {}...'.format(obj_id)) model_path = dp_model['model_tpath'].format(obj_id=obj_id) ren.add_object(obj_id, model_path) poses = misc.get_symmetry_transformations(models_info[obj_id], p['max_sym_disc_step']) for pose_id, pose in enumerate(poses): for view_id, view in enumerate(p['views']): R = view['R'].dot(pose['R']) t = view['R'].dot(pose['t']) + view['t'] vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb'] # Path to the output RGB visualization. vis_rgb_path = p['vis_rgb_tpath'].format(vis_path=p['vis_path'], dataset=p['dataset'], obj_id=obj_id, view_id=view_id, pose_id=pose_id) misc.ensure_dir(os.path.dirname(vis_rgb_path)) inout.save_im(vis_rgb_path, vis_rgb) misc.log('Done.')
out_scene_camera_tpath =\ os.path.join('{out_path}', 'scene_camera', '{obj_id:06d}_scene_camera.json') out_scene_gt_tpath =\ os.path.join('{out_path}', 'scene_gt', '{obj_id:06d}_scene_gt.json') out_views_vis_tpath =\ os.path.join('{out_path}', 'views_radius', '{obj_id:06d}_views_radius={radius}.ply') # Load colors. colors_path = os.path.join( os.path.dirname(inout.__file__), 'colors.json') colors = inout.load_json(colors_path) ################################################################################ out_path = out_tpath.format(dataset=dataset) misc.ensure_dir(out_path) # Load dataset parameters. dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test') dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type) dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type) if not obj_ids: obj_ids = dp_model['obj_ids'] # Image size and K for the RGB image (potentially with SSAA). im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']] K_rgb = dp_camera['K'] * ssaa_fact # Intrinsic parameters for RGB rendering. fx_rgb, fy_rgb, cx_rgb, cy_rgb =\
# Output path templates. out_rgb_tpath =\ os.path.join('{out_path}', '{obj_id:06d}', 'rgb', '{im_id:06d}.png') out_depth_tpath =\ os.path.join('{out_path}', '{obj_id:06d}', 'depth', '{im_id:06d}.png') out_scene_camera_tpath =\ os.path.join('{out_path}', '{obj_id:06d}', 'scene_camera.json') out_scene_gt_tpath =\ os.path.join('{out_path}', '{obj_id:06d}', 'scene_gt.json') out_views_vis_tpath =\ os.path.join('{out_path}', '{obj_id:06d}', 'views_radius={radius}.ply') ################################################################################ out_path = out_tpath.format(dataset=dataset) misc.ensure_dir(out_path) # Load dataset parameters. dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test') dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type) dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type) if not obj_ids: obj_ids = dp_model['obj_ids'] # Image size and K for the RGB image (potentially with SSAA). im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']] K_rgb = dp_camera['K'] * ssaa_fact # Intrinsic parameters for RGB rendering. fx_rgb, fy_rgb, cx_rgb, cy_rgb =\
vis_depth_diff_path = None if p['vis_depth_diff']: vis_depth_diff_path = p['vis_depth_diff_tpath'].format( vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'], scene_id=scene_id, im_id=im_id) # Visualization. #visualization.vis_object_poses( # poses=gt_poses, K=K, renderer=ren, rgb=rgb, depth=depth, # vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path, # vis_rgb_resolve_visib=p['vis_rgb_resolve_visib']) obj_vis_scores = visualization.eval_object_hand_poses( dp_split, scene_id, im_id, gt_poses, K, ren, depth) if scene_id not in obj_vis_dict: obj_vis_dict[scene_id] = {} obj_vis_dict[scene_id][im_id] = obj_vis_scores # Save the object visibility scores misc.ensure_dir(os.path.dirname(config.output_path)) obj_vis_scores_filename = os.path.join(config.output_path, 'obj_vis_scores.json') with open(obj_vis_scores_filename, 'w') as outfile: json.dump(obj_vis_dict, outfile) # [0] num_rendered, [1] visible_of_rendered, [2] valid_of_rendered, [3] valid_of_visible misc.log('Done.')
'meshlab_script_path': os.path.join(os.path.dirname(os.path.realpath(__file__)), 'meshlab_scripts', r'remesh_for_eval_cell=0.25.mlx'), } ################################################################################ # Load dataset parameters. dp_model_in = dataset_params.get_model_params(p['datasets_path'], p['dataset'], p['model_in_type']) dp_model_out = dataset_params.get_model_params(p['datasets_path'], p['dataset'], p['model_out_type']) # Attributes to save for the output models. attrs_to_save = [] # Process models of all objects in the selected dataset. for obj_id in dp_model_in['obj_ids']: misc.log('\n\n\nProcessing model of object {}...\n'.format(obj_id)) model_in_path = dp_model_in['model_tpath'].format(obj_id=obj_id) model_out_path = dp_model_out['model_tpath'].format(obj_id=obj_id) misc.ensure_dir(os.path.dirname(model_out_path)) misc.run_meshlab_script(p['meshlab_server_path'], p['meshlab_script_path'], model_in_path, model_out_path, attrs_to_save) misc.log('Done.')
def vis_object_poses_uv(poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, vis_depth_diff_path=None, vis_rgb_resolve_visib=False, vis_uv_path=None, vis_mask_path=None): """Visualizes 3D object models in specified poses in a single image. Two visualizations are created: 1. An RGB visualization (if vis_rgb_path is not None). 2. A Depth-difference visualization (if vis_depth_diff_path is not None). :param poses: List of dictionaries, each with info about one pose: - 'obj_id': Object ID. - 'R': 3x3 ndarray with a rotation matrix. - 't': 3x1 ndarray with a translation vector. - 'text_info': Info to write at the object (see write_text_on_image). :param K: 3x3 ndarray with an intrinsic camera matrix. :param renderer: Instance of the Renderer class (see renderer.py). :param rgb: ndarray with the RGB input image. :param depth: ndarray with the depth input image. :param vis_rgb_path: Path to the output RGB visualization. :param vis_depth_diff_path: Path to the output depth-difference visualization. :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects (i.e. only the closest object is visualized at each pixel). """ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Indicators of visualization types. vis_rgb = vis_rgb_path is not None vis_depth_diff = vis_depth_diff_path is not None vis_uv = vis_uv_path is not None # assert background images if vis_rgb and rgb is None: raise ValueError( 'RGB visualization triggered but RGB image not provided.') if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: raise ValueError( 'Depth visualization triggered but D image not provided.') # Prepare images for rendering. im_size = None ren_rgb = None ren_rgb_info = None ren_depth = None if vis_rgb: im_size = (rgb.shape[1], rgb.shape[0]) ren_rgb = np.zeros(rgb.shape, np.uint8) ren_rgb_info = np.zeros(rgb.shape, np.uint8) # for the masks if vis_uv: im_size = (rgb.shape[1], rgb.shape[0]) ren_mask = np.zeros(rgb.shape, np.uint8) if vis_depth_diff: if im_size and im_size != (depth.shape[1], depth.shape[0]): raise ValueError('The RGB and D images must have the same size.') else: im_size = (depth.shape[1], depth.shape[0]) if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) # Render the pose estimates one by one. for gt_id, pose in enumerate(poses): # Rendering. ren_out = renderer.render_object(pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) # currently in uv colors m_rgb = None if vis_rgb: m_rgb = ren_out['rgb'] m_mask_rgb = None if vis_uv: # create mask in object color m_mask_rgb = np.sum(m_rgb > 0, axis=2) >= 1 m_mask_rgb = np.stack([m_mask_rgb] * 3, axis=2) # erode mask to remove 'black' border kernel = np.ones((5, 5), np.uint8) m_mask_rgb = cv2.erode(m_mask_rgb.astype(np.uint8), kernel, cv2.BORDER_CONSTANT, borderValue=0).astype(np.bool_) # apply eroded mask to renderings m_rgb = m_rgb * m_mask_rgb # create mask with obj id m_mask_rgb = (m_mask_rgb * pose['obj_id']).astype('uint8') # mask_color = tuple(colors[(obj_id - 1) % len(colors)]) m_mask = None if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): m_depth = ren_out['depth'] # Get mask of the surface parts that are closer than the # surfaces rendered before. visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) m_mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) # # Save uv models solely before starting comination steps # if vis_uv: # misc.ensure_dir(os.path.dirname(vis_uv_path[gt_id])) # ren_uv = np.zeros(rgb.shape, np.uint8) # ren_uv_f = ren_uv.astype(np.float32) + m_rgb.astype(np.float32) # black background + current model rendered # ren_uv_f[ren_uv_f > 255] = 255 # ren_uv = ren_uv_f.astype(np.uint8) # inout.save_im(vis_uv_path[gt_id], ren_uv, jpg_quality=95) # Combine the RGB renderings. if vis_rgb: if vis_rgb_resolve_visib: ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) else: ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype( np.float32) ren_rgb_f[ren_rgb_f > 255] = 255 ren_rgb = ren_rgb_f.astype(np.uint8) m_mask_idx = (ren_mask == 0) & (ren_mask > 0) m_mask_rgb = m_mask_rgb[m_mask_idx] ren_mask = ren_mask + m_mask_rgb ren_mask[ren_mask > 255] = 255 ren_mask = ren_mask.astype(np.uint8) # # Draw 2D bounding box and write text info. # obj_mask = np.sum(m_rgb > 0, axis=2) # ys, xs = obj_mask.nonzero() # if len(ys): # # bbox_color = model_color # # text_color = model_color # bbox_color = (0.3, 0.3, 0.3) # text_color = (1.0, 1.0, 1.0) # text_size = 11 # bbox = misc.calc_2d_bbox(xs, ys, im_size) # im_size = (obj_mask.shape[1], obj_mask.shape[0]) # ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) # if 'text_info' in pose: # text_loc = (bbox[0] + 2, bbox[1]) # ren_rgb_info = write_text_on_image( # ren_rgb_info, pose['text_info'], text_loc, color=text_color, # size=text_size) # Blend and save the RGB visualization. if vis_rgb: misc.ensure_dir(os.path.dirname(vis_rgb_path)) # vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ # 0.5 * ren_rgb.astype(np.float32) + \ # 1.0 * ren_rgb_info.astype(np.float32) # vis_im_rgb[vis_im_rgb > 255] = 255 # inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) inout.save_im(vis_rgb_path, rgb) # only background # Save uv models and masks if vis_uv: misc.ensure_dir(os.path.dirname(vis_uv_path)) ren_uv = ren_rgb.astype(np.uint8) inout.save_im(vis_uv_path, ren_uv) misc.ensure_dir(os.path.dirname(vis_mask_path)) ren_mask = ren_mask.astype(np.uint8) inout.save_im(vis_mask_path, ren_mask) # Save the image of depth differences. if vis_depth_diff: misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) # Calculate the depth difference at pixels where both depth maps are valid. valid_mask = (depth > 0) * (ren_depth > 0) depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth) delta = 15 below_delta = valid_mask * (depth_diff < delta) below_delta_vis = (255 * below_delta).astype(np.uint8) depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min()) depth_diff_vis = np.dstack( [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8) depth_diff_vis[np.logical_not(valid_mask)] = 0 depth_diff_valid = depth_diff[valid_mask] depth_info = [ { 'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid) }, { 'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid) }, { 'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid) }, ] depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) inout.save_im(vis_depth_diff_path, depth_diff_vis)