def test_weighted_perspective_n_points(self, batch_size=16, num_pts=200): # instantiate random x_world and y y = torch.randn((batch_size, num_pts, 2)).cuda() / 3.0 x_cam, x_world, R, T = TestPerspectiveNPoints._generate_epnp_test_from_2d( y) # randomly drop 50% of the rows weights = (torch.rand_like(x_world[:, :, 0]) > 0.5).float() # make sure we retain at least 6 points for each case weights[:, :6] = 1.0 # fill ignored y with trash to ensure that we get different # solution in case the weighting is wrong y = y + (1 - weights[:, :, None]) * 100.0 def norm_fn(t): return t.norm(dim=-1) for skip_quadratic_eq in (True, False): # get the solution for the 0/1 weighted case sol = perspective_n_points.efficient_pnp( x_world, y, skip_quadratic_eq=skip_quadratic_eq, weights=weights) sol_R_quat = rotation_conversions.matrix_to_quaternion(sol.R) sol_T = sol.T # check that running only on points with non-zero weights ends in the # same place as running the 0/1 weighted version for i in range(batch_size): ok = weights[i] > 0 x_world_ok = x_world[i, ok][None] y_ok = y[i, ok][None] sol_ok = perspective_n_points.efficient_pnp( x_world_ok, y_ok, skip_quadratic_eq=False) R_est_quat_ok = rotation_conversions.matrix_to_quaternion( sol_ok.R) self.assertNormsClose(sol_T[i], sol_ok.T[0], rtol=3e-3, norm_fn=norm_fn) self.assertNormsClose(sol_R_quat[i], R_est_quat_ok[0], rtol=3e-4, norm_fn=norm_fn)
def compute_pose_epnp( intrinsic_matrix: th.Tensor, scale: Tuple[float, float, float], points_2d: th.Tensor # ,kpt_index:th.Tensor ): intrinsic_matrix = intrinsic_matrix.reshape(4, 4) # Expected 3D bounding box vertices points_3d = list( itertools.product(*zip([-0.5, -0.5, -0.5], [0.5, 0.5, 0.5]))) points_3d = np.insert(points_3d, 0, [0, 0, 0], axis=0) points_3d = th.as_tensor(points_3d, dtype=th.float32) points_3d = (points_3d[:, None] * th.as_tensor(scale, device=points_3d.device)) # Restore NDC convention + scaling + account for intrinsic matrix # FIXME(ycho): Maybe a more principled unprojection would be needed # in case of nontrivial camer matrices. points_2d = points_2d.clone() points_2d -= 0.5 # [0,1] -> [-0.5, 0.5] points_2d *= -2.0 / intrinsic_matrix[(0, 1), (1, 0)] # NOTE(ycho): PNP occasionally (actually quite often)? fails. try: solution = efficient_pnp( points_3d.transpose(0, 1).to(points_2d.device), points_2d.transpose(0, 1)) return (solution.R, solution.T) except RuntimeError: return None
def compute_transforms(points_2d: th.Tensor, box_scale: th.Tensor, projection_matrix: th.Tensor, point_ids: th.Tensor = None): # Compute target 3d bounding box with scale applied. cube_points = get_cube_points().to(device=points_2d.device) # Extract relevant fields from the dataset. # NOTE(ycho): Cloning here, to avoid overwriting input data. # print(cube_points.shape) # 9,3 --> 1,9,3 # print(box_scale.shape) # ...,3 points_2d = points_2d.clone() # points_3d = cube_points[None, ...] * box_scale[:, None, :] points_3d = cube_points * box_scale[None, :] P = projection_matrix.reshape(4, 4) # Preprocess `points_2d` to comform to `efficient_pnp` convention. # points_2d is in X-Y order (i.e. minor-major), normalized to range (0.0, # 1.0). points_2d -= 0.5 # points_2d *= 2.0 / P[None, None, (0, 1), (0, 1)] points_2d *= 2.0 / P[None, (0, 1), (0, 1)] # Compute PNP solution ... try: # NOTE(ycho): Only apply PnP on the points that were found. points_3d_prv = points_3d if point_ids is not None: points_3d = points_3d[point_ids, ...] solution = efficient_pnp(points_3d[None], points_2d[None], skip_quadratic_eq=True) except RuntimeError as e: print('Encountered error during PnP : {}'.format(e)) print(points_3d_prv.shape) print(point_ids.shape) print(points_2d.shape) return None R, T = solution.R[0], solution.T[0] # NOTE(ycho): **IMPORTANT**: this is the post-processing step # that accounts for the difference in the conventions used # within the `objectron` dataset vs. pytorch3d. # NOTE(ycho): In the PNP solution convention, # y[i] = Proj(x[i] R[i] + T[i]) # In the objectron convention, # y[i] = Proj(R[i] x[i] + T[i]) R = th.transpose(R, -2, -1) # NOTE(ycho): additional correction to account for coordinate flipping. DR = th.as_tensor([[0, 1, 0], [1, 0, 0], [0, 0, -1]], dtype=th.float32, device=R.device) R = th.einsum('ab,...bc->...ac', DR, R) T = th.einsum('ab,...b->...a', DR, T) return (R, T)
def _run_and_print(self, x_world, y, R, T, print_stats, skip_q, check_output=False): sol = perspective_n_points.efficient_pnp(x_world, y.expand_as( x_world[:, :, :2]), skip_quadratic_eq=skip_q) err_2d = reproj_error(x_world, y, sol.R, sol.T) R_est_quat = rotation_conversions.matrix_to_quaternion(sol.R) R_quat = rotation_conversions.matrix_to_quaternion(R) num_pts = x_world.shape[-2] # quadratic part is more stable with fewer points num_pts_thresh = 5 if skip_q else 4 if check_output and num_pts > num_pts_thresh: assert_msg = (f"test_perspective_n_points assertion failure for " f"n_points={num_pts}, " f"skip_quadratic={skip_q}, " f"no noise.") self.assertClose(err_2d, sol.err_2d, msg=assert_msg) self.assertTrue((err_2d < 1e-3).all(), msg=assert_msg) def norm_fn(t): return t.norm(dim=-1) self.assertNormsClose(T, sol.T[:, None, :], rtol=4e-3, norm_fn=norm_fn, msg=assert_msg) self.assertNormsClose(R_quat, R_est_quat, rtol=3e-3, norm_fn=norm_fn, msg=assert_msg) if print_stats: torch.set_printoptions(precision=5, sci_mode=False) for err_2d, err_3d, R_gt, T_gt in zip( sol.err_2d, sol.err_3d, torch.cat((sol.R, R), dim=-1), torch.stack((sol.T, T[:, 0, :]), dim=-1), ): print("2D Error: %1.4f" % err_2d.item()) print("3D Error: %1.4f" % err_3d.item()) print("R_hat | R_gt\n", R_gt) print("T_hat | T_gt\n", T_gt)
def main(): device = th.device('cpu:0') #dataset = ColoredCubeDataset( # ColoredCubeDataset.Settings( # batch_size=1), # device, # transform=None) dataset = ObjectronDetection(ObjectronDetection.Settings(), False) p0 = get_cube_points() # 9,3 for data in dataset: points_2d = data[Schema.KEYPOINT_2D][..., :2] # O,9,2 K = data[Schema.INTRINSIC_MATRIX].reshape(3, 3) # 3,3 P = data[Schema.PROJECTION].reshape(4, 4) print('P', P) # Restore NDC convention + scaling + account for intrinsic matrix # FIXME(ycho): Maybe a more principled unprojection would be needed # in case of nontrivial camer matrices. points_2d -= 0.5 # [0,1] -> [-0.5, 0.5] # print(points_2d) # -2.0/project # print(dataset.tan_half_fov) # print(P[(1,0),(0,1)]) # points_2d *= -2.0 * dataset.tan_half_fov # [x/z,y/z,1.0] # points_2d *= -2.0 / P[None, None, (1, 0), (0, 1)] # points_2d *= -2.0 / P[None, None, (0, 1), (0, 1)] points_2d *= 2.0 / P[None, None, (1, 0), (1, 0)] print('p2d') print(points_2d) print(data[Schema.SCALE].shape) points_2d = th.flip(points_2d, (-1, )) points_3d = p0[None] * data[Schema.SCALE][:, None, :] solution = efficient_pnp(points_3d, points_2d) R_gt = (data[Schema.ORIENTATION].reshape(-1, 3, 3)) T_gt = (data[Schema.TRANSLATION].reshape(-1, 3, 1)) print('GT') print(R_gt.T) # NOTE(ycho): transposed due to mismatch in convention print(T_gt) print('PNP') print(solution.R) print(solution.T) print('ERROR') print(solution.err_2d) print(solution.err_3d) break