# Render depth image of the object model in the ground-truth pose. depth_gt_large = ren.render_object(gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth'] depth_gt = depth_gt_large[ren_cy_offset:(ren_cy_offset + im_height), ren_cx_offset:(ren_cx_offset + im_width)] # Convert depth images to distance images. dist_gt = misc.depth_im_to_dist_im(depth_gt, K) dist_im = misc.depth_im_to_dist_im(depth, K) # Estimation of the visibility mask. visib_gt = visibility.estimate_visib_mask_gt(dist_im, dist_gt, p['delta'], visib_mode='bop19') # Mask of the object in the GT pose. obj_mask_gt_large = depth_gt_large > 0 obj_mask_gt = dist_gt > 0 # Number of pixels in the whole object silhouette # (even in the truncated part). px_count_all = np.sum(obj_mask_gt_large) # Number of pixels in the object silhouette with a valid depth measurement # (i.e. with a non-zero value in the depth image). px_count_valid = np.sum(dist_im[obj_mask_gt] > 0) # Number of pixels in the visible part of the object silhouette.
def vsd(R_est, t_est, R_gt, t_gt, depth_test, K, delta, taus, normalized_by_diameter, diameter, renderer, obj_id, cost_type='step'): """Visible Surface Discrepancy -- by Hodan, Michel et al. (ECCV 2018). :param R_est: 3x3 ndarray with the estimated rotation matrix. :param t_est: 3x1 ndarray with the estimated translation vector. :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. :param t_gt: 3x1 ndarray with the ground-truth translation vector. :param depth_test: hxw ndarray with the test depth image. :param K: 3x3 ndarray with an intrinsic camera matrix. :param delta: Tolerance used for estimation of the visibility masks. :param taus: A list of misalignment tolerance values. :param normalized_by_diameter: Whether to normalize the pixel-wise distances by the object diameter. :param diameter: Object diameter. :param renderer: Instance of the Renderer class (see renderer.py). :param obj_id: Object identifier. :param cost_type: Type of the pixel-wise matching cost: 'tlinear' - Used in the original definition of VSD in: Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16 'step' - Used for SIXD Challenge 2017 onwards. :return: List of calculated errors (one for each misalignment tolerance). """ # Render depth images of the model in the estimated and the ground-truth pose. fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] depth_est = renderer.render_object( obj_id, R_est, t_est, fx, fy, cx, cy)['depth'] depth_gt = renderer.render_object( obj_id, R_gt, t_gt, fx, fy, cx, cy)['depth'] # Convert depth images to distance images. dist_test = misc.depth_im_to_dist_im_fast(depth_test, K) dist_gt = misc.depth_im_to_dist_im_fast(depth_gt, K) dist_est = misc.depth_im_to_dist_im_fast(depth_est, K) # Visibility mask of the model in the ground-truth pose. visib_gt = visibility.estimate_visib_mask_gt( dist_test, dist_gt, delta, visib_mode='bop19') # Visibility mask of the model in the estimated pose. visib_est = visibility.estimate_visib_mask_est( dist_test, dist_est, visib_gt, delta, visib_mode='bop19') # Intersection and union of the visibility masks. visib_inter = np.logical_and(visib_gt, visib_est) visib_union = np.logical_or(visib_gt, visib_est) visib_union_count = visib_union.sum() visib_comp_count = visib_union_count - visib_inter.sum() # Pixel-wise distances. dists = np.abs(dist_gt[visib_inter] - dist_est[visib_inter]) # Normalization of pixel-wise distances by object diameter. if normalized_by_diameter: dists /= diameter # Calculate VSD for each provided value of the misalignment tolerance. if visib_union_count == 0: errors = [1.0] * len(taus) else: errors = [] for tau in taus: # Pixel-wise matching cost. if cost_type == 'step': costs = dists >= tau elif cost_type == 'tlinear': # Truncated linear function. costs = dists / tau costs[costs > 1.0] = 1.0 else: raise ValueError('Unknown pixel matching cost.') e = (np.sum(costs) + visib_comp_count) / float(visib_union_count) errors.append(e) return errors