コード例 #1
0
ファイル: calc_gt_masks.py プロジェクト: wangg12/bop_toolkit
        dist_im = misc.depth_im_to_dist_im(depth_im, K)

        for gt_id, gt in enumerate(scene_gt[im_id]):

            # Render the depth image.
            depth_gt = ren.render_object(gt['obj_id'], gt['cam_R_m2c'],
                                         gt['cam_t_m2c'], fx, fy, cx,
                                         cy)['depth']

            # Convert depth image to distance image.
            dist_gt = misc.depth_im_to_dist_im(depth_gt, K)

            # Mask of the full object silhouette.
            mask = dist_gt > 0

            # Mask of the visible part of the object silhouette.
            mask_visib = visibility.estimate_visib_mask_gt(dist_im,
                                                           dist_gt,
                                                           p['delta'],
                                                           visib_mode='bop19')

            # Save the calculated masks.
            mask_path = dp_split['mask_tpath'].format(scene_id=scene_id,
                                                      im_id=im_id,
                                                      gt_id=gt_id)
            inout.save_im(mask_path, 255 * mask.astype(np.uint8))

            mask_visib_path = dp_split['mask_visib_tpath'].format(
                scene_id=scene_id, im_id=im_id, gt_id=gt_id)
            inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8))
コード例 #2
0
            })

            # Visualization of the visibility mask.
            if p['vis_visibility_masks']:

                depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0)
                depth_im_vis = np.dstack([depth_im_vis] * 3)

                visib_gt_vis = visib_gt.astype(np.float)
                zero_ch = np.zeros(visib_gt_vis.shape)
                visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch])

                vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis
                vis[vis > 1] = 1

                vis_path = p['vis_mask_visib_tpath'].format(
                    delta=p['delta'],
                    dataset=p['dataset'],
                    split=p['dataset_split'],
                    scene_id=scene_id,
                    im_id=im_id,
                    gt_id=gt_id)
                misc.ensure_dir(os.path.dirname(vis_path))
                inout.save_im(vis_path, vis)

    # Save the info for the current scene.
    scene_gt_info_path = dp_split['scene_gt_info_tpath'].format(
        scene_id=scene_id)
    misc.ensure_dir(os.path.dirname(scene_gt_info_path))
    inout.save_json(scene_gt_info_path, scene_gt_info)
コード例 #3
0
ファイル: vis.py プロジェクト: zebrajack/epos
def visualize_pred_frag(frag_confs,
                        frag_coords,
                        output_size,
                        model_store,
                        vis_prefix,
                        vis_dir,
                        vis_ext='png'):
    """Visualizes predicted fragment fields.

  Args:
    frag_confs: Predicted fragment confidences of shape [output_h, output_w,
      num_objs, num_frags].
    frag_coords: Predicted 3D fragment coordinates of shape [field_h, field_w,
      num_fg_cls, num_bins, 3].
    output_size: Size of the fragment fields.
    model_store: Store of 3D object models.
    vis_prefix: Name prefix of the visualizations.
    vis_dir: Where to save the visualizations.
    vis_ext: Extension of the visualizations ('jpg', 'png', etc.).
  """
    num_objs = frag_confs.shape[2]
    tiles_centers = []
    tiles_coords = []
    tiles_reconst = []
    for obj_id in range(1, num_objs + 1):

        # Fragment confidences of shape [field_h, field_w, num_frags].
        conf_obj = frag_confs[:, :, obj_id - 1, :]
        field_shape = (conf_obj.shape[0], conf_obj.shape[1], 3)

        # Indices of fragments with the highest confidence.
        top_inds = np.argmax(conf_obj, axis=2)
        top_inds_f = top_inds.flatten()

        # Fragment centers.
        top_centers = np.reshape(model_store.frag_centers[obj_id][top_inds_f],
                                 field_shape)

        # Fragment coordinates of shape [field_h * field_w, num_frags, 3].
        num_frags = frag_coords.shape[3]
        coords_obj = frag_coords[:, :, obj_id - 1, :, :].reshape(
            (-1, num_frags, 3))

        # Top fragment coordinates of shape [field_h * field_w, 3].
        top_coords_rel = coords_obj[np.arange(top_inds.size), top_inds_f]
        top_scales = model_store.frag_sizes[obj_id][top_inds_f]
        top_coords = top_coords_rel * top_scales.reshape((-1, 1))

        # Reshape to [field_h, field_w, 3].
        top_coords = top_coords.reshape(field_shape)

        # Reconstruction of shape [field_h * field_w, 3].
        top_reconst = top_centers + top_coords

        txt_list = [{'name': 'cls', 'val': obj_id, 'fmt': ':d'}]
        tiles_centers.append(
            visualization.write_text_on_image(colorize_xyz(top_centers),
                                              txt_list,
                                              size=10,
                                              color=(1.0, 1.0, 1.0)))
        tiles_coords.append(
            visualization.write_text_on_image(colorize_xyz(top_coords),
                                              txt_list,
                                              size=10,
                                              color=(1.0, 1.0, 1.0)))
        tiles_reconst.append(
            visualization.write_text_on_image(colorize_xyz(top_reconst),
                                              txt_list,
                                              size=10,
                                              color=(1.0, 1.0, 1.0)))

    # Assemble and save the visualization grids.
    fname = '{}_pred_frag_centers.{}'.format(vis_prefix, vis_ext)
    grid = build_grid(tiles_centers, output_size)
    inout.save_im(os.path.join(vis_dir, fname), grid)

    fname = '{}_pred_frag_coords.{}'.format(vis_prefix, vis_ext)
    grid = build_grid(tiles_coords, output_size)
    inout.save_im(os.path.join(vis_dir, fname), grid)

    fname = '{}_pred_frag_reconst.{}'.format(vis_prefix, vis_ext)
    grid = build_grid(tiles_reconst, output_size)
    inout.save_im(os.path.join(vis_dir, fname), grid)
コード例 #4
0
ファイル: visualization.py プロジェクト: wangg12/bop_toolkit
def vis_object_poses(
      poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None,
      vis_depth_diff_path=None, vis_rgb_resolve_visib=False):
  """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
  fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

  # Indicators of visualization types.
  vis_rgb = vis_rgb_path is not None
  vis_depth_diff = vis_depth_diff_path is not None

  if vis_rgb and rgb is None:
    raise ValueError('RGB visualization triggered but RGB image not provided.')

  if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
    raise ValueError('Depth visualization triggered but D image not provided.')

  # Prepare images for rendering.
  im_size = None
  ren_rgb = None
  ren_rgb_info = None
  ren_depth = None

  if vis_rgb:
    im_size = (rgb.shape[1], rgb.shape[0])
    ren_rgb = np.zeros(rgb.shape, np.uint8)
    ren_rgb_info = np.zeros(rgb.shape, np.uint8)

  if vis_depth_diff:
    if im_size and im_size != (depth.shape[1], depth.shape[0]):
        raise ValueError('The RGB and D images must have the same size.')
    else:
      im_size = (depth.shape[1], depth.shape[0])

  if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
    ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

  # Render the pose estimates one by one.
  for pose in poses:

    # Rendering.
    ren_out = renderer.render_object(
      pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy)

    m_rgb = None
    if vis_rgb:
      m_rgb = ren_out['rgb']

    m_mask = None
    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
      m_depth = ren_out['depth']

      # Get mask of the surface parts that are closer than the
      # surfaces rendered before.
      visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
      m_mask = np.logical_and(m_depth != 0, visible_mask)

      ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

    # Combine the RGB renderings.
    if vis_rgb:
      if vis_rgb_resolve_visib:
        ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
      else:
        ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32)
        ren_rgb_f[ren_rgb_f > 255] = 255
        ren_rgb = ren_rgb_f.astype(np.uint8)

      # Draw 2D bounding box and write text info.
      obj_mask = np.sum(m_rgb > 0, axis=2)
      ys, xs = obj_mask.nonzero()
      if len(ys):
        # bbox_color = model_color
        # text_color = model_color
        bbox_color = (0.3, 0.3, 0.3)
        text_color = (1.0, 1.0, 1.0)
        text_size = 11

        bbox = misc.calc_2d_bbox(xs, ys, im_size)
        im_size = (obj_mask.shape[1], obj_mask.shape[0])
        ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

        if 'text_info' in pose:
          text_loc = (bbox[0] + 2, bbox[1])
          ren_rgb_info = write_text_on_image(
            ren_rgb_info, pose['text_info'], text_loc, color=text_color,
            size=text_size)

  # Blend and save the RGB visualization.
  if vis_rgb:
    vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
                 0.5 * ren_rgb.astype(np.float32) + \
                 1.0 * ren_rgb_info.astype(np.float32)
    vis_im_rgb[vis_im_rgb > 255] = 255
    misc.ensure_dir(os.path.dirname(vis_rgb_path))
    inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)

  # Save the image of depth differences.
  if vis_depth_diff:
    # Calculate the depth difference at pixels where both depth maps
    # are valid.
    valid_mask = (depth > 0) * (ren_depth > 0)
    depth_diff = valid_mask * (depth - ren_depth.astype(np.float32))

    f, ax = plt.subplots(1, 1)
    cax = ax.matshow(depth_diff)
    ax.axis('off')
    ax.set_title('captured - GT depth [mm]')
    f.colorbar(cax, fraction=0.03, pad=0.01)
    f.tight_layout(pad=0)

    if not vis_rgb:
      misc.ensure_dir(os.path.dirname(vis_depth_diff_path))
    plt.savefig(vis_depth_diff_path, pad=0, bbox_inches='tight', quality=95)
    plt.close()
コード例 #5
0
def vis_object_poses(
      poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None,
      vis_depth_diff_path=None, vis_rgb_resolve_visib=False):
  """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
  fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

  # Indicators of visualization types.
  vis_rgb = vis_rgb_path is not None
  vis_depth_diff = vis_depth_diff_path is not None

  if vis_rgb and rgb is None:
    raise ValueError('RGB visualization triggered but RGB image not provided.')

  if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
    raise ValueError('Depth visualization triggered but D image not provided.')

  # Prepare images for rendering.
  im_size = None
  ren_rgb = None
  ren_rgb_info = None
  ren_depth = None

  if vis_rgb:
    im_size = (rgb.shape[1], rgb.shape[0])
    ren_rgb = np.zeros(rgb.shape, np.uint8)
    ren_rgb_info = np.zeros(rgb.shape, np.uint8)

  if vis_depth_diff:
    if im_size and im_size != (depth.shape[1], depth.shape[0]):
        raise ValueError('The RGB and D images must have the same size.')
    else:
      im_size = (depth.shape[1], depth.shape[0])

  if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
    ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

  # Render the pose estimates one by one.
  for pose in poses:

    # Rendering.
    ren_out = renderer.render_object(
      pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy)

    m_rgb = None
    if vis_rgb:
      m_rgb = ren_out['rgb']

    m_mask = None
    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
      m_depth = ren_out['depth']

      # Get mask of the surface parts that are closer than the
      # surfaces rendered before.
      visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
      m_mask = np.logical_and(m_depth != 0, visible_mask)

      ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

    # Combine the RGB renderings.
    if vis_rgb:
      if vis_rgb_resolve_visib:
        ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
      else:
        ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32)
        ren_rgb_f[ren_rgb_f > 255] = 255
        ren_rgb = ren_rgb_f.astype(np.uint8)

      # Draw 2D bounding box and write text info.
      obj_mask = np.sum(m_rgb > 0, axis=2)
      ys, xs = obj_mask.nonzero()
      if len(ys):
        # bbox_color = model_color
        # text_color = model_color
        bbox_color = (0.3, 0.3, 0.3)
        text_color = (1.0, 1.0, 1.0)
        text_size = 11

        bbox = misc.calc_2d_bbox(xs, ys, im_size)
        im_size = (obj_mask.shape[1], obj_mask.shape[0])
        ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

        if 'text_info' in pose:
          text_loc = (bbox[0] + 2, bbox[1])
          ren_rgb_info = write_text_on_image(
            ren_rgb_info, pose['text_info'], text_loc, color=text_color,
            size=text_size)

  # Blend and save the RGB visualization.
  if vis_rgb:
    misc.ensure_dir(os.path.dirname(vis_rgb_path))

    vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
                 0.5 * ren_rgb.astype(np.float32) + \
                 1.0 * ren_rgb_info.astype(np.float32)
    vis_im_rgb[vis_im_rgb > 255] = 255
    inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)

  # Save the image of depth differences.
  if vis_depth_diff:
    misc.ensure_dir(os.path.dirname(vis_depth_diff_path))

    # Calculate the depth difference at pixels where both depth maps are valid.
    valid_mask = (depth > 0) * (ren_depth > 0)
    depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth)

    # Get mask of pixels where the rendered depth is at most by the tolerance
    # delta behind the captured depth (this tolerance is used in VSD).
    delta = 15
    below_delta = valid_mask * (depth_diff < delta)
    below_delta_vis = (255 * below_delta).astype(np.uint8)

    depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min())

    # Pixels where the rendered depth is more than the tolerance delta behing
    # the captured depth will be cyan.
    depth_diff_vis = np.dstack(
      [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8)

    depth_diff_vis[np.logical_not(valid_mask)] = 0
    depth_diff_valid = depth_diff[valid_mask]
    depth_info = [
      {'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid)},
      {'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid)},
      {'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid)},
    ]
    depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
    inout.save_im(vis_depth_diff_path, depth_diff_vis)
コード例 #6
0
ファイル: vis.py プロジェクト: zebrajack/epos
def visualize_gt_frag(gt_obj_ids, gt_obj_masks, gt_frag_labels,
                      gt_frag_weights, gt_frag_coords, output_size,
                      model_store, vis_prefix, vis_dir):
    """Visualizes GT fragment fields.

  Args:
    gt_obj_ids: GT object ID's.
    gt_obj_masks: GT object instance masks.
    gt_frag_labels: GT fragment labels.
    gt_frag_weights: GT fragment weights.
    gt_frag_coords: GT fragment coordinates.
    output_size: Size of the output fields.
    model_store: Store of 3D object models.
    vis_dir: Where to save the visualizations.
    vis_prefix: Name prefix of the visualizations.
  """
    # Consider the first (i.e. the closest) fragment.
    frag_ind = 0

    centers_vis = np.zeros((output_size[1], output_size[0], 3))
    for gt_id, obj_id in enumerate(gt_obj_ids):
        obj_mask = gt_obj_masks[gt_id]
        obj_frag_labels = gt_frag_labels[obj_mask][:, frag_ind]
        centers_vis[obj_mask] = model_store.frag_centers[obj_id][
            obj_frag_labels]

    weights_vis = gt_frag_weights[:, :, frag_ind]
    weights_vis /= weights_vis.max()

    coords_vis = np.zeros((output_size[1], output_size[0], 3))
    for gt_id, obj_id in enumerate(gt_obj_ids):

        obj_mask = gt_obj_masks[gt_id]
        obj_frag_labels = gt_frag_labels[obj_mask][:, frag_ind]
        obj_frag_coords = gt_frag_coords[obj_mask][:, frag_ind, :]

        # Scale by fragment sizes.
        frag_scales = model_store.frag_sizes[obj_id][obj_frag_labels]
        obj_frag_coords *= np.expand_dims(frag_scales, 1)

        coords_vis[obj_mask] = obj_frag_coords

    # Reconstruct the XYZ object coordinates.
    xyz_vis = centers_vis + coords_vis

    # Normalize the visualizations.
    centers_vis = centers_vis - centers_vis.min()
    centers_vis /= centers_vis.max()

    coords_vis = coords_vis - coords_vis.min()
    coords_vis /= coords_vis.max()

    xyz_vis = xyz_vis - xyz_vis.min()
    xyz_vis /= xyz_vis.max()

    # Save the visualizations.
    inout.save_im(
        os.path.join(vis_dir, '{}_gt_frag_labels.png'.format(vis_prefix)),
        (255.0 * centers_vis).astype(np.uint8))

    inout.save_im(
        os.path.join(vis_dir, '{}_gt_frag_coords.png'.format(vis_prefix)),
        (255.0 * coords_vis).astype(np.uint8))

    inout.save_im(
        os.path.join(vis_dir, '{}_gt_frag_reconst.png'.format(vis_prefix)),
        (255.0 * xyz_vis).astype(np.uint8))

    inout.save_im(
        os.path.join(vis_dir, '{}_gt_frag_weights.png'.format(vis_prefix)),
        (255.0 * weights_vis).astype(np.uint8))
コード例 #7
0
      # mask_color = tuple(colors[(obj_id - 1) % len(colors)])

      # find bbox top left and bottom right and cut images
      rs, cs = obj_mask[:,:,0].nonzero() # row and column coordinates
      if len(rs):
        bb_min = [rs.min(), cs.min()]
        bb_max = [rs.max(), cs.max()]
      rgb      =      rgb[bb_min[0]:bb_max[0]+1, bb_min[1]:bb_max[1]+1, :]
      uv       =       uv[bb_min[0]:bb_max[0]+1, bb_min[1]:bb_max[1]+1, :]
      obj_mask = obj_mask[bb_min[0]:bb_max[0]+1, bb_min[1]:bb_max[1]+1, :]
      # depth tbd...

      # Save the rendered images.
      out_rgb_path = out_rgb_tpath.format(
        out_path=out_path, obj_id=obj_id, im_id=im_id)
      inout.save_im(out_rgb_path, rgb)

      # out_depth_path = out_depth_tpath.format(
      #   out_path=out_path, obj_id=obj_id, im_id=im_id)
      # inout.save_depth(out_depth_path, depth)

      out_uv_path = out_uv_tpath.format(
        out_path=out_path, obj_id=obj_id, im_id=im_id)
      inout.save_im(out_uv_path, uv)

      out_mask_path = out_mask_tpath.format(
        out_path=out_path, obj_id=obj_id, im_id=im_id)
      inout.save_im(out_mask_path, obj_mask)

      # Get 2D bounding box of the object model at the ground truth pose.
      # ys, xs = np.nonzero(depth > 0)
コード例 #8
0
for obj_id in dp_model['obj_ids']:

    # Load object model.
    misc.log('Loading 3D model of object {}...'.format(obj_id))
    model_path = dp_model['model_tpath'].format(obj_id=obj_id)
    ren.add_object(obj_id, model_path)

    poses = misc.get_symmetry_transformations(models_info[obj_id],
                                              p['max_sym_disc_step'])

    for pose_id, pose in enumerate(poses):

        for view_id, view in enumerate(p['views']):

            R = view['R'].dot(pose['R'])
            t = view['R'].dot(pose['t']) + view['t']

            vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb']

            # Path to the output RGB visualization.
            vis_rgb_path = p['vis_rgb_tpath'].format(vis_path=p['vis_path'],
                                                     dataset=p['dataset'],
                                                     obj_id=obj_id,
                                                     view_id=view_id,
                                                     pose_id=pose_id)
            misc.ensure_dir(os.path.dirname(vis_rgb_path))
            inout.save_im(vis_rgb_path, vis_rgb)

misc.log('Done.')
コード例 #9
0
            img = inout.load_im(rgb_fn)
            mask = inout.load_im(mask_files[img_id]) > 0
            vu_valid = np.where(mask)
            bbox = np.array([
                np.min(vu_valid[0]),
                np.min(vu_valid[1]),
                np.max(vu_valid[0]),
                np.max(vu_valid[1])
            ])
            crop_img = np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3),
                                np.uint8)
            img = img[bbox[0]:bbox[2], bbox[1]:bbox[3]]
            crop_img[mask[bbox[0]:bbox[2],
                          bbox[1]:bbox[3]]] = img[mask[bbox[0]:bbox[2],
                                                       bbox[1]:bbox[3]]]
            inout.save_im(crop_fn, crop_img)
            inout.save_im(
                cropmask_fn,
                mask[bbox[0]:bbox[2], bbox[1]:bbox[3]].astype(np.uint8) * 255)
        crop_fns.append(crop_fn)
        crop_masks.append(cropmask_fn)
        obj_idx = model_map.index(obj_id)

        instance_id = model_maxinst[obj_idx]
        model_idx[obj_idx, instance_id] = overall_idx
        model_maxinst[obj_idx] += 1
        overall_idx += 1
    z_tra_mean = np.mean(z_tras)
    mean_scale = z_tra_mean / mean_depth  # 0.5 / 1
    mean_sigma = 0.5 * mean_scale
コード例 #10
0
            # Convert depth so it is in the same units as other images in the dataset.
            depth /= float(dp_camera['depth_scale'])

            # The OpenCV function was used for rendering of the training images
            # provided for the SIXD Challenge 2017.
            rgb = cv2.resize(rgb,
                             dp_camera['im_size'],
                             interpolation=cv2.INTER_AREA)
            # rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic')

            # Save the rendered images.
            out_rgb_path = out_rgb_tpath.format(out_path=out_path,
                                                obj_id=obj_id,
                                                im_id=im_id)
            inout.save_im(out_rgb_path, rgb)
            out_depth_path = out_depth_tpath.format(out_path=out_path,
                                                    obj_id=obj_id,
                                                    im_id=im_id)
            inout.save_depth(out_depth_path, depth)

            # Get 2D bounding box of the object model at the ground truth pose.
            # ys, xs = np.nonzero(depth > 0)
            # obj_bb = misc.calc_2d_bbox(xs, ys, dp_camera['im_size'])

            scene_camera[im_id] = {
                'cam_K': dp_camera['K'].flatten().tolist(),
                'depth_scale': dp_camera['depth_scale'],
                'view_level': int(views_level[view_id])
            }
コード例 #11
0
    for im_id in range(len(ref_gt)):
        rgb_fn = os.path.join(target_dir + "/rgb", "{:06d}.png".format(im_id))
        depth_fn = os.path.join(target_dir + "/depth",
                                "{:06d}.png".format(im_id))
        mask_fn = os.path.join(target_dir + "/mask",
                               "{:06d}.png".format(im_id))

        rot = ref_gt[im_id][0]['cam_R_m2c']
        tra = ref_gt[im_id][0]['cam_t_m2c'] / 1000

        tf = np.eye(4)
        tf[:3, :3] = rot
        tf[:3, 3] = tra[:, 0]

        ren.clear()
        ren.draw_model(obj_model, tf)
        img_r, depth = ren.finish()
        img_r = img_r[:, :, ::-1]
        mask = depth > 0
        inout.save_im(rgb_fn, (img_r * 255).astype(np.uint8))
        inout.save_im(mask_fn, mask.astype(np.uint8) * 255)

        new_gt[im_id][0]['obj_bb'] = [0, 0, 0, 0]
        new_gt[im_id][0]['obj_id'] = int(model_ids[i])
        new_camera[im_id]['cam_K'] = np.array(camK)
        new_camera[im_id]['depth_scale'] = float(1)
        #inout.save_depth(depth_fn,depth*65535) #we don't need detph for training (use only for ICP/inference)

    inout.save_scene_gt(scene_gt, new_gt)
    inout.save_scene_camera(scene_camera, new_camera)
コード例 #12
0
def vis_object_poses_uv(poses,
                        K,
                        renderer,
                        rgb=None,
                        depth=None,
                        vis_rgb_path=None,
                        vis_depth_diff_path=None,
                        vis_rgb_resolve_visib=False,
                        vis_uv_path=None,
                        vis_mask_path=None):
    """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
    fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

    # Indicators of visualization types.
    vis_rgb = vis_rgb_path is not None
    vis_depth_diff = vis_depth_diff_path is not None
    vis_uv = vis_uv_path is not None

    # assert background images
    if vis_rgb and rgb is None:
        raise ValueError(
            'RGB visualization triggered but RGB image not provided.')

    if (vis_depth_diff or
        (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
        raise ValueError(
            'Depth visualization triggered but D image not provided.')

    # Prepare images for rendering.
    im_size = None
    ren_rgb = None
    ren_rgb_info = None
    ren_depth = None

    if vis_rgb:
        im_size = (rgb.shape[1], rgb.shape[0])
        ren_rgb = np.zeros(rgb.shape, np.uint8)
        ren_rgb_info = np.zeros(rgb.shape, np.uint8)

    # for the masks
    if vis_uv:
        im_size = (rgb.shape[1], rgb.shape[0])
        ren_mask = np.zeros(rgb.shape, np.uint8)

    if vis_depth_diff:
        if im_size and im_size != (depth.shape[1], depth.shape[0]):
            raise ValueError('The RGB and D images must have the same size.')
        else:
            im_size = (depth.shape[1], depth.shape[0])

    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
        ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

    # Render the pose estimates one by one.
    for gt_id, pose in enumerate(poses):

        # Rendering.
        ren_out = renderer.render_object(pose['obj_id'], pose['R'], pose['t'],
                                         fx, fy, cx, cy)

        # currently in uv colors
        m_rgb = None
        if vis_rgb:
            m_rgb = ren_out['rgb']

        m_mask_rgb = None
        if vis_uv:
            # create mask in object color
            m_mask_rgb = np.sum(m_rgb > 0, axis=2) >= 1
            m_mask_rgb = np.stack([m_mask_rgb] * 3, axis=2)
            # erode mask to remove 'black' border
            kernel = np.ones((5, 5), np.uint8)
            m_mask_rgb = cv2.erode(m_mask_rgb.astype(np.uint8),
                                   kernel,
                                   cv2.BORDER_CONSTANT,
                                   borderValue=0).astype(np.bool_)

            # apply eroded mask to renderings
            m_rgb = m_rgb * m_mask_rgb

            # create mask with obj id
            m_mask_rgb = (m_mask_rgb * pose['obj_id']).astype('uint8')
            # mask_color = tuple(colors[(obj_id - 1) % len(colors)])

        m_mask = None
        if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
            m_depth = ren_out['depth']

            # Get mask of the surface parts that are closer than the
            # surfaces rendered before.
            visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
            m_mask = np.logical_and(m_depth != 0, visible_mask)

            ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

        # # Save uv models solely before starting comination steps
        # if vis_uv:
        #   misc.ensure_dir(os.path.dirname(vis_uv_path[gt_id]))
        #   ren_uv = np.zeros(rgb.shape, np.uint8)
        #   ren_uv_f = ren_uv.astype(np.float32) + m_rgb.astype(np.float32) # black background + current model rendered
        #   ren_uv_f[ren_uv_f > 255] = 255
        #   ren_uv = ren_uv_f.astype(np.uint8)
        #   inout.save_im(vis_uv_path[gt_id], ren_uv, jpg_quality=95)

        # Combine the RGB renderings.
        if vis_rgb:
            if vis_rgb_resolve_visib:
                ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
            else:
                ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(
                    np.float32)
                ren_rgb_f[ren_rgb_f > 255] = 255
                ren_rgb = ren_rgb_f.astype(np.uint8)

                m_mask_idx = (ren_mask == 0) & (ren_mask > 0)
                m_mask_rgb = m_mask_rgb[m_mask_idx]
                ren_mask = ren_mask + m_mask_rgb
                ren_mask[ren_mask > 255] = 255
                ren_mask = ren_mask.astype(np.uint8)
            # # Draw 2D bounding box and write text info.
            # obj_mask = np.sum(m_rgb > 0, axis=2)
            # ys, xs = obj_mask.nonzero()
            # if len(ys):
            #   # bbox_color = model_color
            #   # text_color = model_color
            #   bbox_color = (0.3, 0.3, 0.3)
            #   text_color = (1.0, 1.0, 1.0)
            #   text_size = 11

            #   bbox = misc.calc_2d_bbox(xs, ys, im_size)
            #   im_size = (obj_mask.shape[1], obj_mask.shape[0])
            #   ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

            #   if 'text_info' in pose:
            #     text_loc = (bbox[0] + 2, bbox[1])
            #     ren_rgb_info = write_text_on_image(
            #       ren_rgb_info, pose['text_info'], text_loc, color=text_color,
            #       size=text_size)

    # Blend and save the RGB visualization.
    if vis_rgb:
        misc.ensure_dir(os.path.dirname(vis_rgb_path))

        # vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
        #              0.5 * ren_rgb.astype(np.float32) + \
        #              1.0 * ren_rgb_info.astype(np.float32)
        # vis_im_rgb[vis_im_rgb > 255] = 255
        # inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)
        inout.save_im(vis_rgb_path, rgb)  # only background

        # Save uv models and masks
        if vis_uv:
            misc.ensure_dir(os.path.dirname(vis_uv_path))
            ren_uv = ren_rgb.astype(np.uint8)
            inout.save_im(vis_uv_path, ren_uv)

            misc.ensure_dir(os.path.dirname(vis_mask_path))
            ren_mask = ren_mask.astype(np.uint8)
            inout.save_im(vis_mask_path, ren_mask)

    # Save the image of depth differences.
    if vis_depth_diff:
        misc.ensure_dir(os.path.dirname(vis_depth_diff_path))

        # Calculate the depth difference at pixels where both depth maps are valid.
        valid_mask = (depth > 0) * (ren_depth > 0)
        depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth)

        delta = 15
        below_delta = valid_mask * (depth_diff < delta)
        below_delta_vis = (255 * below_delta).astype(np.uint8)

        depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min())
        depth_diff_vis = np.dstack(
            [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8)
        depth_diff_vis[np.logical_not(valid_mask)] = 0
        depth_diff_valid = depth_diff[valid_mask]
        depth_info = [
            {
                'name': 'min diff',
                'fmt': ':.3f',
                'val': np.min(depth_diff_valid)
            },
            {
                'name': 'max diff',
                'fmt': ':.3f',
                'val': np.max(depth_diff_valid)
            },
            {
                'name': 'mean diff',
                'fmt': ':.3f',
                'val': np.mean(depth_diff_valid)
            },
        ]
        depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
        inout.save_im(vis_depth_diff_path, depth_diff_vis)
コード例 #13
0
ファイル: infer.py プロジェクト: zebrajack/epos
def visualize(
      samples, predictions, pred_poses, im_ind, crop_size, output_scale,
      model_store, renderer, vis_dir):
  """Visualizes estimates from one image.

  Args:
    samples: Dictionary with input data.
    predictions: Dictionary with predictions.
    pred_poses: Predicted poses.
    im_ind: Image index.
    crop_size: Image crop size (width, height).
    output_scale: Scale of the model output w.r.t. the input (output / input).
    model_store: Store for 3D object models of class ObjectModelStore.
    renderer: Renderer of class bop_renderer.Renderer().
    vis_dir: Directory where the visualizations will be saved.
  """
  tf.logging.info('Visualization for: {}'.format(
    samples[common.IMAGE_PATH][0].decode('utf8')))

  # Size of a visualization grid tile.
  tile_size = (300, 225)

  # Extension of the saved visualizations ('jpg', 'png', etc.).
  vis_ext = 'jpg'

  # Font settings.
  font_size = 10
  font_color = (0.8, 0.8, 0.8)

  # Intrinsics.
  K = samples[common.K][0]
  output_K = K * output_scale
  output_K[2, 2] = 1.0

  # Tiles for the grid visualization.
  tiles = []

  # Size of the output fields.
  output_size =\
    int(output_scale * crop_size[0]), int(output_scale * crop_size[1])

  # Prefix of the visualization names.
  vis_prefix = '{:06d}'.format(im_ind)

  # Input RGB image.
  rgb = np.squeeze(samples[common.IMAGE][0])
  vis_rgb = visualization.write_text_on_image(
    misc.resize_image_py(rgb, tile_size).astype(np.uint8),
    [{'name': '', 'val': 'input', 'fmt': ':s'}],
    size=font_size, color=font_color)
  tiles.append(vis_rgb)

  # Visualize the ground-truth poses.
  if FLAGS.vis_gt_poses:

    gt_poses = []
    for gt_id, obj_id in enumerate(samples[common.GT_OBJ_IDS][0]):
      q = samples[common.GT_OBJ_QUATS][0][gt_id]
      R = transform.quaternion_matrix(q)[:3, :3]
      t = samples[common.GT_OBJ_TRANS][0][gt_id].reshape((3, 1))
      gt_poses.append({'obj_id': obj_id, 'R': R, 't': t})

    vis_gt_poses = vis.visualize_object_poses(rgb, K, gt_poses, renderer)
    vis_gt_poses = visualization.write_text_on_image(
      misc.resize_image_py(vis_gt_poses, tile_size),
      [{'name': '', 'val': 'gt poses', 'fmt': ':s'}],
      size=font_size, color=font_color)
    tiles.append(vis_gt_poses)

  # Visualize the estimated poses.
  if FLAGS.vis_pred_poses:
    vis_pred_poses = vis.visualize_object_poses(rgb, K, pred_poses, renderer)
    vis_pred_poses = visualization.write_text_on_image(
      misc.resize_image_py(vis_pred_poses, tile_size),
      [{'name': '', 'val': 'pred poses', 'fmt': ':s'}],
      size=font_size, color=font_color)
    tiles.append(vis_pred_poses)

  # Ground-truth object labels.
  if FLAGS.vis_gt_obj_labels and common.GT_OBJ_LABEL in samples:
    obj_labels = np.squeeze(samples[common.GT_OBJ_LABEL][0])
    obj_labels = obj_labels[:crop_size[1], :crop_size[0]]
    obj_labels = vis.colorize_label_map(obj_labels)
    obj_labels = visualization.write_text_on_image(
      misc.resize_image_py(obj_labels.astype(np.uint8), tile_size),
      [{'name': '', 'val': 'gt obj labels', 'fmt': ':s'}],
      size=font_size, color=font_color)
    tiles.append(obj_labels)

  # Predicted object labels.
  if FLAGS.vis_pred_obj_labels:
    obj_labels = np.squeeze(predictions[common.PRED_OBJ_LABEL][0])
    obj_labels = obj_labels[:crop_size[1], :crop_size[0]]
    obj_labels = vis.colorize_label_map(obj_labels)
    obj_labels = visualization.write_text_on_image(
      misc.resize_image_py(obj_labels.astype(np.uint8), tile_size),
      [{'name': '', 'val': 'predicted obj labels', 'fmt': ':s'}],
      size=font_size, color=font_color)
    tiles.append(obj_labels)

  # Predicted object confidences.
  if FLAGS.vis_pred_obj_confs:
    num_obj_labels = predictions[common.PRED_OBJ_CONF].shape[-1]
    for obj_label in range(num_obj_labels):
      obj_confs = misc.resize_image_py(np.array(
        predictions[common.PRED_OBJ_CONF][0, :, :, obj_label]), tile_size)
      obj_confs = (255.0 * obj_confs).astype(np.uint8)
      obj_confs = np.dstack([obj_confs, obj_confs, obj_confs])  # To RGB.
      obj_confs = visualization.write_text_on_image(
        obj_confs, [{'name': 'cls', 'val': obj_label, 'fmt': ':d'}],
        size=font_size, color=font_color)
      tiles.append(obj_confs)

  # Visualization of ground-truth fragment fields.
  if FLAGS.vis_gt_frag_fields and common.GT_OBJ_IDS in samples:
    vis.visualize_gt_frag(
      gt_obj_ids=samples[common.GT_OBJ_IDS][0],
      gt_obj_masks=samples[common.GT_OBJ_MASKS][0],
      gt_frag_labels=samples[common.GT_FRAG_LABEL][0],
      gt_frag_weights=samples[common.GT_FRAG_WEIGHT][0],
      gt_frag_coords=samples[common.GT_FRAG_LOC][0],
      output_size=output_size,
      model_store=model_store,
      vis_prefix=vis_prefix,
      vis_dir=vis_dir)

  # Visualization of predicted fragment fields.
  if FLAGS.vis_pred_frag_fields:
    vis.visualize_pred_frag(
      frag_confs=predictions[common.PRED_FRAG_CONF][0],
      frag_coords=predictions[common.PRED_FRAG_LOC][0],
      output_size=output_size,
      model_store=model_store,
      vis_prefix=vis_prefix,
      vis_dir=vis_dir,
      vis_ext=vis_ext)

  # Build and save a visualization grid.
  grid = vis.build_grid(tiles, tile_size)
  grid_vis_path = os.path.join(
    vis_dir, '{}_grid.{}'.format(vis_prefix, vis_ext))
  inout.save_im(grid_vis_path, grid)