def estmatedMask(depth, model, groundTruthP, estimatedP, K, maskT): ''' get the visibility mask of the estimate pose :param depth: the depth image of ground truth :param model: the object model given by a dictionary where 'pts' :param groundTruthP: the ground truth pose :param estimatedP: the estimate pose :param K: from the camera :param maskT: the tolerate that allow for mask calculate. :return: visiable mask of ground truth ''' im_size = (depth.shape[1], depth.shape[0]) # Render depth images of the model in the estimated and the ground truth pose estimatedDepth = renderer.render(model, im_size, K, estimatedP['R'], estimatedP['t'], mode='depth') # Convert depth images to distance images distance = misc.depth_im_to_dist_im(depth, K) estimatedDistance = misc.depth_im_to_dist_im(estimatedDepth, K) # Visibility mask of the model in the estimated pose mask = v.estimate_visib_mask_est( distance, estimatedDistance, groundTruthMask(depth, model, groundTruthP, K, maskT), maskT) return mask
def groundTruthMask(depth, model, groundTruthP, K, maskT): ''' get the visibility mask of the ground truth pose :param depth: the depth image of the test image :param model: the object model given by a dictionary where 'pts' :param groundTruthP: the ground truth pose :param K: from the camera :param maskT: the tolerate that can influence the result. :return: visiable mask of ground truth ''' im_size = (depth.shape[1], depth.shape[0]) # Render depth images of the model in the ground truth pose groundTruthDepth = renderer.render(model, im_size, K, groundTruthP['R'], groundTruthP['t'], mode='depth') # Convert depth images to distance images distance = misc.depth_im_to_dist_im(depth, K) groundTruthDistance = misc.depth_im_to_dist_im(groundTruthDepth, K) # Get the mask of ground truth. mask = v.estimate_visib_mask_gt(distance, groundTruthDistance, maskT) return mask
def calcost(estmitendPose, groundTruthPose, model, depthImage, delta, tau, K): im_size = (depthImage.shape[1], depthImage.shape[0]) # Render depth images of the model in the estimated and the ground truth pose depth_est = renderer.render(model, im_size, K, estmitendPose['R'], estmitendPose['t'], clip_near=100, clip_far=10000, mode='depth') depth_gt = renderer.render(model, im_size, K, groundTruthPose['R'], groundTruthPose['t'], clip_near=100, clip_far=10000, mode='depth') # Convert depth images to distance images dist_test = obj_pose_eval.misc.depth_im_to_dist_im(depthImage, K) dist_gt = obj_pose_eval.misc.depth_im_to_dist_im(depth_gt, K) dist_est = obj_pose_eval.misc.depth_im_to_dist_im(depth_est, K) # Visibility mask of the model in the ground truth pose visib_gt = v.estimate_visib_mask_gt(dist_test, dist_gt, delta) # Visibility mask of the model in the estimated pose visib_est = v.estimate_visib_mask_est(dist_test, dist_est, visib_gt, delta) # Intersection and union of the visibility masks visib_inter = np.logical_and(visib_gt, visib_est) # Pixel-wise matching cost costs = np.abs(dist_gt[visib_inter] - dist_est[visib_inter]) costs *= (1.0 / tau) costs[costs > 1.0] = 1.0 return costs
def show(type, degree): # Load object model model_path = 'cup.ply' model = inout.load_ply(model_path) # Camera parameters K = np.eye(3) K[0, 0] = 500.0 # fx K[1, 1] = 500.0 # fy K[0, 2] = 250.0 # cx K[1, 2] = 250.0 # cy im_size = (500, 500) # Calculate the poses of the rotating cup poses = [] alpha_range = np.linspace(0, 360, 361) for alpha in alpha_range: def d2r(d): return np.pi * float(d) / 180.0 # Degrees to radians R = transform.rotation_matrix(d2r(alpha), [0, 1, 0])[:3, :3] # Rotation around Y R = transform.rotation_matrix(d2r(30), [1, 0, 0])[:3, :3].dot( R) # Rotation around X t = np.array([0.0, 0.0, 180]).reshape((3, 1)) # Flip Y axis (model coordinate system -> OpenCV coordinate system) R = transform.rotation_matrix(np.pi, [1, 0, 0])[:3, :3].dot(R) poses.append({'R': R, 't': t}) # Set and render the ground truth pose gt_id = 90 # ID of the ground truth pose pose_gt = poses[gt_id] pose_gt_indis_set_ids = range( 55, 126) # IDs of poses indistinguishable from the GT pose pose_gt_indis_set = [poses[i] for i in pose_gt_indis_set_ids] depth_gt = renderer.render(model, im_size, K, pose_gt['R'], pose_gt['t'], 100, 2000, mode='depth') # Synthesize the test depth image depth_test = np.array(depth_gt) depth_test[depth_test == 0] = 1000 if type == 'average': diff(int(degree), pose_gt, poses, model, depth_test, 3, 30, K) elif type == 'standard_deviation': standard_dev(int(degree), pose_gt, poses, model, depth_test, 3, 30, K)
def show(type, t1, t2): # Load object model model_path = 'cup.ply' model = inout.load_ply(model_path) # Camera parameters K = np.eye(3) K[0, 0] = 500.0 # fx K[1, 1] = 500.0 # fy K[0, 2] = 250.0 # cx K[1, 2] = 250.0 # cy im_size = (500, 500) # Calculate the poses of the rotating cup poses = [] alpha_range = np.linspace(0, 360, 361) for alpha in alpha_range: def d2r(d): return np.pi * float(d) / 180.0 # Degrees to radians R = transform.rotation_matrix(d2r(alpha), [0, 1, 0])[:3, :3] # Rotation around Y R = transform.rotation_matrix(d2r(30), [1, 0, 0])[:3, :3].dot( R) # Rotation around X t = np.array([0.0, 0.0, 180]).reshape((3, 1)) # Flip Y axis (model coordinate system -> OpenCV coordinate system) R = transform.rotation_matrix(np.pi, [1, 0, 0])[:3, :3].dot(R) poses.append({'R': R, 't': t}) # Set and render the ground truth pose gt_id = 90 # ID of the ground truth pose pose_gt = poses[gt_id] pose_gt_indis_set_ids = range( 55, 126) # IDs of poses indistinguishable from the GT pose pose_gt_indis_set = [poses[i] for i in pose_gt_indis_set_ids] depth_gt = renderer.render(model, im_size, K, pose_gt['R'], pose_gt['t'], 100, 2000, mode='depth') # Synthesize the test depth image depth_test = np.array(depth_gt) depth_test[depth_test == 0] = 1000 # Available errors: 'cpr' 'wivm' 'zdd' # Errors to be calculated: errs_active = [type] # Calculate the pose errors errs = {err: [] for err in errs_active} # the for loop is calculate for 0 - 360 degrees. for pose_id, pose in enumerate(poses): print 'Processing pose:', pose_id if 'cpr' in errs_active: mint = t1 maxt = t2 errs['cpr'].append(error.cpr(model, pose_gt, pose, mint, maxt)) if 'zdd' in errs_active: delta = 3 errs['zdd'].append( error.zdd(pose, pose_gt, model, depth_test, delta, K)) if 'wivm' in errs_active: delta = t1 errs['wivm'].append( error.wivm(pose, pose_gt, model, depth_test, delta, K, t1, t2)) # draw the graph for every degree. for err_name in errs_active: plt.figure() plt.plot(errs[err_name], c='r', lw='3') plt.xlabel('Pose ID') plt.ylabel(err_name) plt.tick_params(labelsize=16) plt.tight_layout() plt.show()
from obj_pose_eval import renderer, inout, transform # Load object model model_path = 'cup.ply' model = inout.load_ply(model_path) # Camera parameters K = np.eye(3) K[0, 0] = 500.0 # fx K[1, 1] = 500.0 # fy K[0, 2] = 250.0 # cx K[1, 2] = 250.0 # cy im_size = (500, 500) R = transform.rotation_matrix(np.pi, (1, 0, 0))[:3, :3] t = np.array([[0, 0, 150]]).T rgb, depth = renderer.render(model, im_size, K, R, t, 100, 2000, mode='rgb+depth') # depth = renderer.render(model, im_size, K, R, t, 100, 2000, mode='depth') # rgb = renderer.render(model, im_size, K, R, t, 100, 2000, mode='rgb') plt.imshow(rgb) plt.title('Rendered color image') plt.matshow(depth) plt.colorbar() plt.title('Rendered depth image') plt.show()
t = np.array([0.0, 0.0, 180]).reshape((3, 1)) # Flip Y axis (model coordinate system -> OpenCV coordinate system) R = transform.rotation_matrix(np.pi, [1, 0, 0])[:3, :3].dot(R) poses.append({'R': R, 't': t}) # Set and render the ground truth pose gt_id = 90 # ID of the ground truth pose pose_gt = poses[gt_id] pose_gt_indis_set_ids = range( 55, 126) # IDs of poses indistinguishable from the GT pose pose_gt_indis_set = [poses[i] for i in pose_gt_indis_set_ids] depth_gt = renderer.render(model, im_size, K, pose_gt['R'], pose_gt['t'], 100, 2000, mode='depth') # Synthesize the test depth image depth_test = np.array(depth_gt) depth_test[depth_test == 0] = 1000 # Available errors: 'vsd', 'acpd', 'mcpd', 'add', 'adi', 'te', 're', 'cou' # Errors to be calculated: errs_active = ['vsd'] # Calculate the pose errors errs = {err: [] for err in errs_active} for pose_id, pose in enumerate(poses):
depth_fpath = depth_fpath_mask.format(im_id_str) depth = cv2.imread(depth_fpath, cv2.IMREAD_UNCHANGED).astype(np.float32) # Convert the input depth image to a distance image dist = misc.depth_im_to_dist_im(depth, K) for obj_id, obj_name in enumerate(objs): pose = gt_poses[obj_id][int(im_id)] if pose['R'].size != 0 and pose['t'].size != 0: # Render the object model depth_ren_gt = renderer.render(models[obj_id], im_size, K, pose['R'], pose['t'], 0.1, 2.0, surf_color=(0.0, 1.0, 0.0), mode='depth') depth_ren_gt *= 1000 # Convert the rendered depth map to [mm] # Convert the input depth image to a distance image dist_ren_gt = misc.depth_im_to_dist_im(depth_ren_gt, K) # Estimate the visibility mask delta = 15 # [mm] visib_mask = visibility.estimate_visib_mask( dist, dist_ren_gt, delta) # Get the non-visibility (occlusion) mask
rgb_fpath = rgb_fpath_mask.format(im_id_str) rgb = cv2.imread(rgb_fpath, cv2.IMREAD_COLOR) rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) depth_fpath = depth_fpath_mask.format(im_id_str) depth = cv2.imread(depth_fpath, cv2.IMREAD_UNCHANGED).astype(np.float32) # Convert the input depth image to a distance image dist = misc.depth_im_to_dist_im(depth, K) for obj_id, obj_name in enumerate(objs): pose = gt_poses[obj_id][int(im_id)] if pose['R'].size != 0 and pose['t'].size != 0: # Render the object model depth_ren_gt = renderer.render( models[obj_id], im_size, K, pose['R'], pose['t'], 0.1, 2.0, surf_color=(0.0, 1.0, 0.0), mode='depth') depth_ren_gt *= 1000 # Convert the rendered depth map to [mm] # Convert the input depth image to a distance image dist_ren_gt = misc.depth_im_to_dist_im(depth_ren_gt, K) # Estimate the visibility mask delta = 15 # [mm] visib_mask = visibility.estimate_visib_mask(dist, dist_ren_gt, delta) # Get the non-visibility (occlusion) mask nonvisib_mask = np.logical_and(~visib_mask, dist_ren_gt > 0) # Difference between the test and the rendered distance image dist_diff = dist_ren_gt.astype(np.float32) - dist.astype(np.float32)
def wivm(estimatePose, groundTruthPose, model, depth, delta, K, inn, un): """ Weight for Inner Visibility Mask :param estimatePose: Estimated pose given by a dictionary: {'R': 3x3 rotation matrix, 't': 3x1 translation vector}. :param groundTruthPose: The ground truth pose given by a dictionary :param model: Object model given by a dictionary where item 'pts' is nx3 ndarray with 3D model points. :param depth: Depth image of the test scene. :param delta: Tolerance used for estimation of the visibility masks :param K: camera pramter :param inn: weight for their own part :param un: weight for the union set part :return: the error for WIVM """ im_size = (depth.shape[1], depth.shape[0]) # Render depth images of the model in the estimated and the ground truth pose depth_est = renderer.render(model, im_size, K, estimatePose['R'], estimatePose['t'], clip_near=100, clip_far=10000, mode='depth') depth_gt = renderer.render(model, im_size, K, groundTruthPose['R'], groundTruthPose['t'], clip_near=100, clip_far=10000, mode='depth') # Convert depth images to distance images dist_test = misc.depth_im_to_dist_im(depth, K) dist_gt = misc.depth_im_to_dist_im(depth_gt, K) dist_est = misc.depth_im_to_dist_im(depth_est, K) # Visibility mask of the model in the ground truth pose gt = groundTruthMask(depth, model, groundTruthPose, K, delta) # Visibility mask of the model in the estimated pose est = estmatedMask(depth, model, groundTruthPose, estimatePose, K, delta) # union set for ground truth and estimate visibility mask union = np.logical_or(gt, est) # distance image with visibility union mask distUnionMaskEST = dist_est[union] distUnionMaskGT = dist_gt[union] avgEST1 = (float)(distUnionMaskEST.mean()) avgGT1 = (float)(distUnionMaskGT.mean()) standardDeviationEST1, standardDeviationGT1 = 0.0, 0.0 for i in range(0, len(distUnionMaskEST)): standardDeviationEST1 += math.pow((distUnionMaskEST[i] - avgEST1), 2) for i in range(0, len(distUnionMaskGT)): standardDeviationGT1 += math.pow((distUnionMaskGT[i] - avgGT1), 2) # Z test on the union part z1 = (avgEST1 - avgGT1) / math.sqrt((standardDeviationEST1 / len(distUnionMaskEST)) + (standardDeviationGT1) / len(distUnionMaskGT)) sampleEST2 = dist_est[est] sampleGT2 = dist_gt[gt] avgEST2 = (float)(sampleEST2.mean()) avgGT2 = (float)(sampleGT2.mean()) standardDeviationEST2, standardDeviationGT2 = 0.0, 0.0 for i in range(0, len(sampleEST2)): standardDeviationEST2 += math.pow((sampleEST2[i] - avgEST2), 2) for i in range(0, len(sampleGT2)): standardDeviationGT2 += math.pow((sampleGT2[i] - avgGT2), 2) # Z test on the their own part z2 = (avgEST2 - avgGT2) / math.sqrt((standardDeviationEST2 / len(sampleEST2)) + (standardDeviationGT2) / len(sampleGT2)) return math.fabs(un * z1) + math.fabs(inn * z2)
def zdd(estimatePose, groundTruthPose, model, depth, delta, K): """ Z test for Distance Difference :param estimatePose: Estimated pose given by a dictionary: {'R': 3x3 rotation matrix, 't': 3x1 translation vector}. :param groundTruthPose: The ground truth pose given by a dictionary :param model: Object model given by a dictionary where item 'pts' is nx3 ndarray with 3D model points. :param depth: Depth image of the test scene. :param delta: Tolerance used for estimation of the visibility masks :param K: camera pramter :return: the zdd error """ im_size = (depth.shape[1], depth.shape[0]) # Render depth images of the model in the estimated and the ground truth pose depth_est = renderer.render(model, im_size, K, estimatePose['R'], estimatePose['t'], clip_near=100, clip_far=10000, mode='depth') depth_gt = renderer.render(model, im_size, K, groundTruthPose['R'], groundTruthPose['t'], clip_near=100, clip_far=10000, mode='depth') # Convert depth images to distance images dist_test = misc.depth_im_to_dist_im(depth, K) dist_gt = misc.depth_im_to_dist_im(depth_gt, K) dist_est = misc.depth_im_to_dist_im(depth_est, K) # Visibility mask of the model in the ground truth pose gt = groundTruthMask(depth, model, groundTruthPose, K, delta) # Visibility mask of the model in the estimated pose est = estmatedMask(depth, model, groundTruthPose, estimatePose, K, delta) # distance image with visibility mask distMaskEST = dist_est[est] distMaskGT = dist_gt[gt] # average for distance with mask avgEST = (float)(distMaskEST.mean()) avgGT = (float)(distMaskGT.mean()) # calcute standard deviation standardDeviationEST, standardDeviationGT = 0.0, 0.0 for i in range(0, len(distMaskEST)): standardDeviationEST += math.pow((distMaskEST[i] - avgEST), 2) for i in range(0, len(distMaskGT)): standardDeviationGT += math.pow((distMaskGT[i] - avgGT), 2) # get the result of z Test for two group fo data z = (avgEST - avgGT) / math.sqrt((standardDeviationEST / len(distMaskEST)) + (standardDeviationGT) / len(distMaskGT)) return z