Ejemplo n.º 1
0
 def save_errors(_error_sign, _scene_errs):
     # Save the calculated errors to a JSON file.
     errors_path = p['out_errors_tpath'].format(result_name=result_name,
                                                error_sign=_error_sign,
                                                scene_id=scene_id)
     misc.ensure_dir(os.path.dirname(errors_path))
     misc.log('Saving errors to: {}'.format(errors_path))
     inout.save_json(errors_path, _scene_errs)
Ejemplo n.º 2
0
def fragmentation_fps_test():

    output_dir = 'fragmentation_test_output'
    misc.ensure_dir(output_dir)

    datasets = ['hb', 'ycbv', 'tless', 'lmo', 'icbin', 'itodd', 'tudl']

    for dataset in datasets:

        model_type = None
        if dataset == 'tless':
            model_type = 'reconst'
        elif dataset == 'itodd':
            model_type = 'dense'

        dp_model = dataset_params.get_model_params(config.BOP_PATH, dataset,
                                                   model_type)

        for obj_id in dp_model['obj_ids']:
            print('Fragmenting object {} from dataset {}...'.format(
                obj_id, dataset))

            model_fpath = dp_model['model_tpath'].format(obj_id=obj_id)
            model = inout.load_ply(model_fpath)

            # Fragmentation by the furthest point sampling.
            frag_centers, vertex_frag_ids = \
              fragment.fragmentation_fps(model['pts'], num_frags=256)

            # Fragment colors.
            frag_colors = frag_centers - frag_centers.min()
            frag_colors = (255.0 * frag_colors / frag_colors.max()).astype(
                np.uint8)

            # Color the model points by the fragment colors.
            pts_colors = np.zeros((model['pts'].shape[0], 3), np.uint8)
            for frag_id in range(len(frag_centers)):
                pts_colors[vertex_frag_ids == frag_id] = frag_colors[frag_id]

            inout.save_ply(
                os.path.join(
                    output_dir,
                    '{}_obj_{:02d}_fragments.ply'.format(dataset, obj_id)), {
                        'pts': model['pts'],
                        'faces': model['faces'],
                        'colors': pts_colors
                    })
Ejemplo n.º 3
0
    def end(self, session):
        global_step = training_util.global_step(session, self.global_step)

        # Save the confusion matrix to a text file.
        df = pd.DataFrame(self.cm)
        cm_table = tabulate(df, headers='keys', tablefmt='psql')
        cm_path = os.path.join(self.log_dir, 'cm_{}.txt'.format(global_step))
        misc.ensure_dir(os.path.dirname(cm_path))
        with open(cm_path, 'w') as f:
            f.write(cm_table)

        # Calculate mIoU of object segmentation.
        bg_iou = 1.0
        fg_ious = []
        for cls in range(self.num_cls):
            intersection = self.cm[cls, cls]
            union = np.sum(self.cm[cls, :]) + np.sum(
                self.cm[:, cls]) - intersection
            if union > 0:
                iou = intersection / float(union)
                if cls == 0:
                    bg_iou = iou
                else:
                    fg_ious.append(iou)

        if len(fg_ious):
            miou_fg = np.mean(fg_ious)
            miou_all = np.mean(fg_ious + [bg_iou])
        else:
            miou_fg = 0.0
            miou_all = 0.0

        # mIoU calculated over foreground and background classes.
        self.add_scalar_summary('eval/obj_cls_miou_all', miou_all, global_step)

        # mIoU calculated only over foreground classes.
        self.add_scalar_summary('eval/obj_cls_miou_fg', miou_fg, global_step)

        self.summary_writer.flush()
Ejemplo n.º 4
0
            })

            # Visualization of the visibility mask.
            if p['vis_visibility_masks']:

                depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0)
                depth_im_vis = np.dstack([depth_im_vis] * 3)

                visib_gt_vis = visib_gt.astype(np.float)
                zero_ch = np.zeros(visib_gt_vis.shape)
                visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch])

                vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis
                vis[vis > 1] = 1

                vis_path = p['vis_mask_visib_tpath'].format(
                    delta=p['delta'],
                    dataset=p['dataset'],
                    split=p['dataset_split'],
                    scene_id=scene_id,
                    im_id=im_id,
                    gt_id=gt_id)
                misc.ensure_dir(os.path.dirname(vis_path))
                inout.save_im(vis_path, vis)

    # Save the info for the current scene.
    scene_gt_info_path = dp_split['scene_gt_info_tpath'].format(
        scene_id=scene_id)
    misc.ensure_dir(os.path.dirname(scene_gt_info_path))
    inout.save_json(scene_gt_info_path, scene_gt_info)
Ejemplo n.º 5
0
for scene_id in dp_split['scene_ids']:

    # Load scene GT.
    scene_gt_path = dp_split['scene_gt_tpath'].format(scene_id=scene_id)
    scene_gt = inout.load_scene_gt(scene_gt_path)

    # Load scene camera.
    scene_camera_path = dp_split['scene_camera_tpath'].format(
        scene_id=scene_id)
    scene_camera = inout.load_scene_camera(scene_camera_path)

    # Create folders for the output masks (if they do not exist yet).
    mask_dir_path = os.path.dirname(dp_split['mask_tpath'].format(
        scene_id=scene_id, im_id=0, gt_id=0))
    misc.ensure_dir(mask_dir_path)

    mask_visib_dir_path = os.path.dirname(dp_split['mask_visib_tpath'].format(
        scene_id=scene_id, im_id=0, gt_id=0))
    misc.ensure_dir(mask_visib_dir_path)

    # Initialize a renderer.
    misc.log('Initializing renderer...')
    width, height = dp_split['im_size']
    ren = renderer.create_renderer(width,
                                   height,
                                   renderer_type=p['renderer_type'],
                                   mode='depth')

    # Add object models.
    for obj_id in dp_model['obj_ids']:
Ejemplo n.º 6
0
def vis_object_poses(
      poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None,
      vis_depth_diff_path=None, vis_rgb_resolve_visib=False):
  """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
  fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

  # Indicators of visualization types.
  vis_rgb = vis_rgb_path is not None
  vis_depth_diff = vis_depth_diff_path is not None

  if vis_rgb and rgb is None:
    raise ValueError('RGB visualization triggered but RGB image not provided.')

  if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
    raise ValueError('Depth visualization triggered but D image not provided.')

  # Prepare images for rendering.
  im_size = None
  ren_rgb = None
  ren_rgb_info = None
  ren_depth = None

  if vis_rgb:
    im_size = (rgb.shape[1], rgb.shape[0])
    ren_rgb = np.zeros(rgb.shape, np.uint8)
    ren_rgb_info = np.zeros(rgb.shape, np.uint8)

  if vis_depth_diff:
    if im_size and im_size != (depth.shape[1], depth.shape[0]):
        raise ValueError('The RGB and D images must have the same size.')
    else:
      im_size = (depth.shape[1], depth.shape[0])

  if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
    ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

  # Render the pose estimates one by one.
  for pose in poses:

    # Rendering.
    ren_out = renderer.render_object(
      pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy)

    m_rgb = None
    if vis_rgb:
      m_rgb = ren_out['rgb']

    m_mask = None
    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
      m_depth = ren_out['depth']

      # Get mask of the surface parts that are closer than the
      # surfaces rendered before.
      visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
      m_mask = np.logical_and(m_depth != 0, visible_mask)

      ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

    # Combine the RGB renderings.
    if vis_rgb:
      if vis_rgb_resolve_visib:
        ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
      else:
        ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32)
        ren_rgb_f[ren_rgb_f > 255] = 255
        ren_rgb = ren_rgb_f.astype(np.uint8)

      # Draw 2D bounding box and write text info.
      obj_mask = np.sum(m_rgb > 0, axis=2)
      ys, xs = obj_mask.nonzero()
      if len(ys):
        # bbox_color = model_color
        # text_color = model_color
        bbox_color = (0.3, 0.3, 0.3)
        text_color = (1.0, 1.0, 1.0)
        text_size = 11

        bbox = misc.calc_2d_bbox(xs, ys, im_size)
        im_size = (obj_mask.shape[1], obj_mask.shape[0])
        ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

        if 'text_info' in pose:
          text_loc = (bbox[0] + 2, bbox[1])
          ren_rgb_info = write_text_on_image(
            ren_rgb_info, pose['text_info'], text_loc, color=text_color,
            size=text_size)

  # Blend and save the RGB visualization.
  if vis_rgb:
    vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
                 0.5 * ren_rgb.astype(np.float32) + \
                 1.0 * ren_rgb_info.astype(np.float32)
    vis_im_rgb[vis_im_rgb > 255] = 255
    misc.ensure_dir(os.path.dirname(vis_rgb_path))
    inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)

  # Save the image of depth differences.
  if vis_depth_diff:
    # Calculate the depth difference at pixels where both depth maps
    # are valid.
    valid_mask = (depth > 0) * (ren_depth > 0)
    depth_diff = valid_mask * (depth - ren_depth.astype(np.float32))

    f, ax = plt.subplots(1, 1)
    cax = ax.matshow(depth_diff)
    ax.axis('off')
    ax.set_title('captured - GT depth [mm]')
    f.colorbar(cax, fraction=0.03, pad=0.01)
    f.tight_layout(pad=0)

    if not vis_rgb:
      misc.ensure_dir(os.path.dirname(vis_depth_diff_path))
    plt.savefig(vis_depth_diff_path, pad=0, bbox_inches='tight', quality=95)
    plt.close()
Ejemplo n.º 7
0
def vis_object_poses(
      poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None,
      vis_depth_diff_path=None, vis_rgb_resolve_visib=False):
  """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
  fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

  # Indicators of visualization types.
  vis_rgb = vis_rgb_path is not None
  vis_depth_diff = vis_depth_diff_path is not None

  if vis_rgb and rgb is None:
    raise ValueError('RGB visualization triggered but RGB image not provided.')

  if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
    raise ValueError('Depth visualization triggered but D image not provided.')

  # Prepare images for rendering.
  im_size = None
  ren_rgb = None
  ren_rgb_info = None
  ren_depth = None

  if vis_rgb:
    im_size = (rgb.shape[1], rgb.shape[0])
    ren_rgb = np.zeros(rgb.shape, np.uint8)
    ren_rgb_info = np.zeros(rgb.shape, np.uint8)

  if vis_depth_diff:
    if im_size and im_size != (depth.shape[1], depth.shape[0]):
        raise ValueError('The RGB and D images must have the same size.')
    else:
      im_size = (depth.shape[1], depth.shape[0])

  if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
    ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

  # Render the pose estimates one by one.
  for pose in poses:

    # Rendering.
    ren_out = renderer.render_object(
      pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy)

    m_rgb = None
    if vis_rgb:
      m_rgb = ren_out['rgb']

    m_mask = None
    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
      m_depth = ren_out['depth']

      # Get mask of the surface parts that are closer than the
      # surfaces rendered before.
      visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
      m_mask = np.logical_and(m_depth != 0, visible_mask)

      ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

    # Combine the RGB renderings.
    if vis_rgb:
      if vis_rgb_resolve_visib:
        ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
      else:
        ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32)
        ren_rgb_f[ren_rgb_f > 255] = 255
        ren_rgb = ren_rgb_f.astype(np.uint8)

      # Draw 2D bounding box and write text info.
      obj_mask = np.sum(m_rgb > 0, axis=2)
      ys, xs = obj_mask.nonzero()
      if len(ys):
        # bbox_color = model_color
        # text_color = model_color
        bbox_color = (0.3, 0.3, 0.3)
        text_color = (1.0, 1.0, 1.0)
        text_size = 11

        bbox = misc.calc_2d_bbox(xs, ys, im_size)
        im_size = (obj_mask.shape[1], obj_mask.shape[0])
        ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

        if 'text_info' in pose:
          text_loc = (bbox[0] + 2, bbox[1])
          ren_rgb_info = write_text_on_image(
            ren_rgb_info, pose['text_info'], text_loc, color=text_color,
            size=text_size)

  # Blend and save the RGB visualization.
  if vis_rgb:
    misc.ensure_dir(os.path.dirname(vis_rgb_path))

    vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
                 0.5 * ren_rgb.astype(np.float32) + \
                 1.0 * ren_rgb_info.astype(np.float32)
    vis_im_rgb[vis_im_rgb > 255] = 255
    inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)

  # Save the image of depth differences.
  if vis_depth_diff:
    misc.ensure_dir(os.path.dirname(vis_depth_diff_path))

    # Calculate the depth difference at pixels where both depth maps are valid.
    valid_mask = (depth > 0) * (ren_depth > 0)
    depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth)

    # Get mask of pixels where the rendered depth is at most by the tolerance
    # delta behind the captured depth (this tolerance is used in VSD).
    delta = 15
    below_delta = valid_mask * (depth_diff < delta)
    below_delta_vis = (255 * below_delta).astype(np.uint8)

    depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min())

    # Pixels where the rendered depth is more than the tolerance delta behing
    # the captured depth will be cyan.
    depth_diff_vis = np.dstack(
      [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8)

    depth_diff_vis[np.logical_not(valid_mask)] = 0
    depth_diff_valid = depth_diff[valid_mask]
    depth_info = [
      {'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid)},
      {'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid)},
      {'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid)},
    ]
    depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
    inout.save_im(vis_depth_diff_path, depth_diff_vis)
for obj_id in dp_model['obj_ids']:

    # Load object model.
    misc.log('Loading 3D model of object {}...'.format(obj_id))
    model_path = dp_model['model_tpath'].format(obj_id=obj_id)
    ren.add_object(obj_id, model_path)

    poses = misc.get_symmetry_transformations(models_info[obj_id],
                                              p['max_sym_disc_step'])

    for pose_id, pose in enumerate(poses):

        for view_id, view in enumerate(p['views']):

            R = view['R'].dot(pose['R'])
            t = view['R'].dot(pose['t']) + view['t']

            vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb']

            # Path to the output RGB visualization.
            vis_rgb_path = p['vis_rgb_tpath'].format(vis_path=p['vis_path'],
                                                     dataset=p['dataset'],
                                                     obj_id=obj_id,
                                                     view_id=view_id,
                                                     pose_id=pose_id)
            misc.ensure_dir(os.path.dirname(vis_rgb_path))
            inout.save_im(vis_rgb_path, vis_rgb)

misc.log('Done.')
Ejemplo n.º 9
0
out_scene_camera_tpath =\
  os.path.join('{out_path}', 'scene_camera', '{obj_id:06d}_scene_camera.json')
out_scene_gt_tpath =\
  os.path.join('{out_path}', 'scene_gt', '{obj_id:06d}_scene_gt.json')
out_views_vis_tpath =\
  os.path.join('{out_path}', 'views_radius', '{obj_id:06d}_views_radius={radius}.ply')

# Load colors.
colors_path = os.path.join(
  os.path.dirname(inout.__file__), 'colors.json')
colors = inout.load_json(colors_path)
################################################################################


out_path = out_tpath.format(dataset=dataset)
misc.ensure_dir(out_path)

# Load dataset parameters.
dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test')
dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type)
dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type)

if not obj_ids:
  obj_ids = dp_model['obj_ids']

# Image size and K for the RGB image (potentially with SSAA).
im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']]
K_rgb = dp_camera['K'] * ssaa_fact

# Intrinsic parameters for RGB rendering.
fx_rgb, fy_rgb, cx_rgb, cy_rgb =\
Ejemplo n.º 10
0
# Output path templates.
out_rgb_tpath =\
  os.path.join('{out_path}', '{obj_id:06d}', 'rgb', '{im_id:06d}.png')
out_depth_tpath =\
  os.path.join('{out_path}', '{obj_id:06d}', 'depth', '{im_id:06d}.png')
out_scene_camera_tpath =\
  os.path.join('{out_path}', '{obj_id:06d}', 'scene_camera.json')
out_scene_gt_tpath =\
  os.path.join('{out_path}', '{obj_id:06d}', 'scene_gt.json')
out_views_vis_tpath =\
  os.path.join('{out_path}', '{obj_id:06d}', 'views_radius={radius}.ply')
################################################################################

out_path = out_tpath.format(dataset=dataset)
misc.ensure_dir(out_path)

# Load dataset parameters.
dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test')
dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type)
dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type)

if not obj_ids:
    obj_ids = dp_model['obj_ids']

# Image size and K for the RGB image (potentially with SSAA).
im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']]
K_rgb = dp_camera['K'] * ssaa_fact

# Intrinsic parameters for RGB rendering.
fx_rgb, fy_rgb, cx_rgb, cy_rgb =\
Ejemplo n.º 11
0
        vis_depth_diff_path = None
        if p['vis_depth_diff']:
            vis_depth_diff_path = p['vis_depth_diff_tpath'].format(
                vis_path=p['vis_path'],
                dataset=p['dataset'],
                split=p['dataset_split'],
                scene_id=scene_id,
                im_id=im_id)

        # Visualization.
        #visualization.vis_object_poses(
        #  poses=gt_poses, K=K, renderer=ren, rgb=rgb, depth=depth,
        #  vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path,
        #  vis_rgb_resolve_visib=p['vis_rgb_resolve_visib'])

        obj_vis_scores = visualization.eval_object_hand_poses(
            dp_split, scene_id, im_id, gt_poses, K, ren, depth)
        if scene_id not in obj_vis_dict:
            obj_vis_dict[scene_id] = {}
        obj_vis_dict[scene_id][im_id] = obj_vis_scores

# Save the object visibility scores
misc.ensure_dir(os.path.dirname(config.output_path))
obj_vis_scores_filename = os.path.join(config.output_path,
                                       'obj_vis_scores.json')
with open(obj_vis_scores_filename, 'w') as outfile:
    json.dump(obj_vis_dict, outfile)
# [0] num_rendered, [1] visible_of_rendered, [2] valid_of_rendered, [3] valid_of_visible

misc.log('Done.')
    'meshlab_script_path':
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 'meshlab_scripts', r'remesh_for_eval_cell=0.25.mlx'),
}
################################################################################

# Load dataset parameters.
dp_model_in = dataset_params.get_model_params(p['datasets_path'], p['dataset'],
                                              p['model_in_type'])

dp_model_out = dataset_params.get_model_params(p['datasets_path'],
                                               p['dataset'],
                                               p['model_out_type'])

# Attributes to save for the output models.
attrs_to_save = []

# Process models of all objects in the selected dataset.
for obj_id in dp_model_in['obj_ids']:
    misc.log('\n\n\nProcessing model of object {}...\n'.format(obj_id))

    model_in_path = dp_model_in['model_tpath'].format(obj_id=obj_id)
    model_out_path = dp_model_out['model_tpath'].format(obj_id=obj_id)

    misc.ensure_dir(os.path.dirname(model_out_path))

    misc.run_meshlab_script(p['meshlab_server_path'], p['meshlab_script_path'],
                            model_in_path, model_out_path, attrs_to_save)

misc.log('Done.')
Ejemplo n.º 13
0
def vis_object_poses_uv(poses,
                        K,
                        renderer,
                        rgb=None,
                        depth=None,
                        vis_rgb_path=None,
                        vis_depth_diff_path=None,
                        vis_rgb_resolve_visib=False,
                        vis_uv_path=None,
                        vis_mask_path=None):
    """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
    fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

    # Indicators of visualization types.
    vis_rgb = vis_rgb_path is not None
    vis_depth_diff = vis_depth_diff_path is not None
    vis_uv = vis_uv_path is not None

    # assert background images
    if vis_rgb and rgb is None:
        raise ValueError(
            'RGB visualization triggered but RGB image not provided.')

    if (vis_depth_diff or
        (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
        raise ValueError(
            'Depth visualization triggered but D image not provided.')

    # Prepare images for rendering.
    im_size = None
    ren_rgb = None
    ren_rgb_info = None
    ren_depth = None

    if vis_rgb:
        im_size = (rgb.shape[1], rgb.shape[0])
        ren_rgb = np.zeros(rgb.shape, np.uint8)
        ren_rgb_info = np.zeros(rgb.shape, np.uint8)

    # for the masks
    if vis_uv:
        im_size = (rgb.shape[1], rgb.shape[0])
        ren_mask = np.zeros(rgb.shape, np.uint8)

    if vis_depth_diff:
        if im_size and im_size != (depth.shape[1], depth.shape[0]):
            raise ValueError('The RGB and D images must have the same size.')
        else:
            im_size = (depth.shape[1], depth.shape[0])

    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
        ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

    # Render the pose estimates one by one.
    for gt_id, pose in enumerate(poses):

        # Rendering.
        ren_out = renderer.render_object(pose['obj_id'], pose['R'], pose['t'],
                                         fx, fy, cx, cy)

        # currently in uv colors
        m_rgb = None
        if vis_rgb:
            m_rgb = ren_out['rgb']

        m_mask_rgb = None
        if vis_uv:
            # create mask in object color
            m_mask_rgb = np.sum(m_rgb > 0, axis=2) >= 1
            m_mask_rgb = np.stack([m_mask_rgb] * 3, axis=2)
            # erode mask to remove 'black' border
            kernel = np.ones((5, 5), np.uint8)
            m_mask_rgb = cv2.erode(m_mask_rgb.astype(np.uint8),
                                   kernel,
                                   cv2.BORDER_CONSTANT,
                                   borderValue=0).astype(np.bool_)

            # apply eroded mask to renderings
            m_rgb = m_rgb * m_mask_rgb

            # create mask with obj id
            m_mask_rgb = (m_mask_rgb * pose['obj_id']).astype('uint8')
            # mask_color = tuple(colors[(obj_id - 1) % len(colors)])

        m_mask = None
        if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
            m_depth = ren_out['depth']

            # Get mask of the surface parts that are closer than the
            # surfaces rendered before.
            visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
            m_mask = np.logical_and(m_depth != 0, visible_mask)

            ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

        # # Save uv models solely before starting comination steps
        # if vis_uv:
        #   misc.ensure_dir(os.path.dirname(vis_uv_path[gt_id]))
        #   ren_uv = np.zeros(rgb.shape, np.uint8)
        #   ren_uv_f = ren_uv.astype(np.float32) + m_rgb.astype(np.float32) # black background + current model rendered
        #   ren_uv_f[ren_uv_f > 255] = 255
        #   ren_uv = ren_uv_f.astype(np.uint8)
        #   inout.save_im(vis_uv_path[gt_id], ren_uv, jpg_quality=95)

        # Combine the RGB renderings.
        if vis_rgb:
            if vis_rgb_resolve_visib:
                ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
            else:
                ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(
                    np.float32)
                ren_rgb_f[ren_rgb_f > 255] = 255
                ren_rgb = ren_rgb_f.astype(np.uint8)

                m_mask_idx = (ren_mask == 0) & (ren_mask > 0)
                m_mask_rgb = m_mask_rgb[m_mask_idx]
                ren_mask = ren_mask + m_mask_rgb
                ren_mask[ren_mask > 255] = 255
                ren_mask = ren_mask.astype(np.uint8)
            # # Draw 2D bounding box and write text info.
            # obj_mask = np.sum(m_rgb > 0, axis=2)
            # ys, xs = obj_mask.nonzero()
            # if len(ys):
            #   # bbox_color = model_color
            #   # text_color = model_color
            #   bbox_color = (0.3, 0.3, 0.3)
            #   text_color = (1.0, 1.0, 1.0)
            #   text_size = 11

            #   bbox = misc.calc_2d_bbox(xs, ys, im_size)
            #   im_size = (obj_mask.shape[1], obj_mask.shape[0])
            #   ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

            #   if 'text_info' in pose:
            #     text_loc = (bbox[0] + 2, bbox[1])
            #     ren_rgb_info = write_text_on_image(
            #       ren_rgb_info, pose['text_info'], text_loc, color=text_color,
            #       size=text_size)

    # Blend and save the RGB visualization.
    if vis_rgb:
        misc.ensure_dir(os.path.dirname(vis_rgb_path))

        # vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
        #              0.5 * ren_rgb.astype(np.float32) + \
        #              1.0 * ren_rgb_info.astype(np.float32)
        # vis_im_rgb[vis_im_rgb > 255] = 255
        # inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)
        inout.save_im(vis_rgb_path, rgb)  # only background

        # Save uv models and masks
        if vis_uv:
            misc.ensure_dir(os.path.dirname(vis_uv_path))
            ren_uv = ren_rgb.astype(np.uint8)
            inout.save_im(vis_uv_path, ren_uv)

            misc.ensure_dir(os.path.dirname(vis_mask_path))
            ren_mask = ren_mask.astype(np.uint8)
            inout.save_im(vis_mask_path, ren_mask)

    # Save the image of depth differences.
    if vis_depth_diff:
        misc.ensure_dir(os.path.dirname(vis_depth_diff_path))

        # Calculate the depth difference at pixels where both depth maps are valid.
        valid_mask = (depth > 0) * (ren_depth > 0)
        depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth)

        delta = 15
        below_delta = valid_mask * (depth_diff < delta)
        below_delta_vis = (255 * below_delta).astype(np.uint8)

        depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min())
        depth_diff_vis = np.dstack(
            [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8)
        depth_diff_vis[np.logical_not(valid_mask)] = 0
        depth_diff_valid = depth_diff[valid_mask]
        depth_info = [
            {
                'name': 'min diff',
                'fmt': ':.3f',
                'val': np.min(depth_diff_valid)
            },
            {
                'name': 'max diff',
                'fmt': ':.3f',
                'val': np.max(depth_diff_valid)
            },
            {
                'name': 'mean diff',
                'fmt': ':.3f',
                'val': np.mean(depth_diff_valid)
            },
        ]
        depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
        inout.save_im(vis_depth_diff_path, depth_diff_vis)