def _compute_norm_sign_scaling_factor(c_cam, alphas, x_world, y, weight, eps=1e-9): """ Given a solution, adjusts the scale and flip Args: c_cam: control points in camera coordinates alphas: barycentric coordinates of the points x_world: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`. y: Batch of 2-dimensional points of shape `(minibatch, num_points, 2)`. weights: Batch of non-negative weights of shape `(minibatch, num_point)`. `None` means equal weights. eps: epsilon to threshold negative `z` values """ # position of reference points in camera coordinates x_cam = torch.matmul(alphas, c_cam) x_cam = x_cam * (1.0 - 2.0 * (oputil.wmean(x_cam[..., 2:], weight) < 0).float()) if torch.any(x_cam[..., 2:] < -eps): neg_rate = oputil.wmean((x_cam[..., 2:] < 0).float(), weight, dim=(0, 1)).item() warnings.warn("\nEPnP: %2.2f%% points have z<0." % (neg_rate * 100.0)) R, T, s = points_alignment.corresponding_points_alignment( x_world, x_cam, weight, estimate_scale=True ) s = s.clamp(eps) x_cam = x_cam / s[:, None, None] T = T / s[:, None] x_w_rotated = torch.matmul(x_world, R) + T[:, None, :] err_2d = _reproj_error(x_w_rotated, y, weight) err_3d = _algebraic_error(x_w_rotated, x_cam, weight) return EpnpSolution(x_cam, R, T, err_2d, err_3d)
def test_wmean(self): device = torch.device("cuda:0") n_points = 20 x = torch.rand(n_points, 3, device=device) weight = torch.rand(n_points, device=device) x_np = x.cpu().data.numpy() weight_np = weight.cpu().data.numpy() # test unweighted mean = oputil.wmean(x, keepdim=False) mean_gt = np.average(x_np, axis=-2) self.assertClose(mean.cpu().data.numpy(), mean_gt) # test weighted mean = oputil.wmean(x, weight=weight, keepdim=False) mean_gt = np.average(x_np, axis=-2, weights=weight_np) self.assertClose(mean.cpu().data.numpy(), mean_gt) # test keepdim mean = oputil.wmean(x, weight=weight, keepdim=True) self.assertClose(mean[0].cpu().data.numpy(), mean_gt) # test binary weigths mean = oputil.wmean(x, weight=weight > 0.5, keepdim=False) mean_gt = np.average(x_np, axis=-2, weights=weight_np > 0.5) self.assertClose(mean.cpu().data.numpy(), mean_gt) # test broadcasting x = torch.rand(10, n_points, 3, device=device) x_np = x.cpu().data.numpy() mean = oputil.wmean(x, weight=weight, keepdim=False) mean_gt = np.average(x_np, axis=-2, weights=weight_np) self.assertClose(mean.cpu().data.numpy(), mean_gt) weight = weight[None, None, :].repeat(3, 1, 1) mean = oputil.wmean(x, weight=weight, keepdim=False) self.assertClose(mean[0].cpu().data.numpy(), mean_gt) # test failing broadcasting weight = torch.rand(x.shape[0], device=device) with self.assertRaises(ValueError) as context: oputil.wmean(x, weight=weight, keepdim=False) self.assertTrue("weights are not compatible" in str(context.exception)) # test dim weight = torch.rand(x.shape[0], n_points, device=device) weight_np = np.tile( weight[:, :, None].cpu().data.numpy(), (1, 1, x_np.shape[-1]), ) mean = oputil.wmean(x, dim=0, weight=weight, keepdim=False) mean_gt = np.average(x_np, axis=0, weights=weight_np) self.assertClose(mean.cpu().data.numpy(), mean_gt) # test dim tuple mean = oputil.wmean(x, dim=(0, 1), weight=weight, keepdim=False) mean_gt = np.average(x_np, axis=(0, 1), weights=weight_np) self.assertClose(mean.cpu().data.numpy(), mean_gt)
def _define_control_points(x, weight, storage_opts=None): """ Returns control points that define barycentric coordinates Args: x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`. weight: Batch of non-negative weights of shape `(minibatch, num_point)`. `None` means equal weights. storage_opts: dict of keyword arguments to the tensor constructor. """ storage_opts = storage_opts or {} x_mean = oputil.wmean(x, weight) x_std = oputil.wmean((x - x_mean)**2, weight)**0.5 c_world = F.pad(torch.eye(3, **storage_opts), (0, 0, 0, 1), value=0.0).expand_as(x[:, :4, :]) return c_world * x_std + x_mean
def _algebraic_error(x_w_rotated, x_cam, weight): """Computes the residual of Umeyama in 3D. Args: x_w_rotated: The given 3D points rotated with the predicted camera. x_cam: the lifted 2D points y weight: Batch of non-negative weights of shape `(minibatch, num_point)`. `None` means equal weights. Returns: Optionally weighted MSE of difference between x_w_rotated and x_cam. """ dist = ((x_w_rotated - x_cam)**2).sum(dim=-1, keepdim=True) return oputil.wmean(dist, weight)[:, 0, 0]
def _reproj_error(y_hat, y, weight, eps=1e-9): """Projects estimated 3D points and computes the reprojection error Args: y_hat: a batch of predicted 2D points in homogeneous coordinates y: a batch of ground-truth 2D points weight: Batch of non-negative weights of shape `(minibatch, num_point)`. `None` means equal weights. Returns: Optionally weighted RMSE of difference between y and y_hat. """ y_hat = y_hat / torch.clamp(y_hat[..., 2:], eps) dist = ((y - y_hat[..., :2])**2).sum(dim=-1, keepdim=True)**0.5 return oputil.wmean(dist, weight)[:, 0, 0]
def corresponding_points_alignment( X: Union[torch.Tensor, Pointclouds], Y: Union[torch.Tensor, Pointclouds], weights: Union[torch.Tensor, List[torch.Tensor], None] = None, estimate_scale: bool = False, allow_reflection: bool = False, eps: float = 1e-8, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Finds a similarity transformation (rotation `R`, translation `T` and optionally scale `s`) between two given sets of corresponding `d`-dimensional points `X` and `Y` such that: `s[i] X[i] R[i] + T[i] = Y[i]`, for all batch indexes `i` in the least squares sense. The algorithm is also known as Umeyama [1]. Args: X: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)` or a `Pointclouds` object. Y: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)` or a `Pointclouds` object. weights: Batch of non-negative weights of shape `(minibatch, num_point)` or list of `minibatch` 1-dimensional tensors that may have different shapes; in that case, the length of i-th tensor should be equal to the number of points in X_i and Y_i. Passing `None` means uniform weights. estimate_scale: If `True`, also estimates a scaling component `s` of the transformation. Otherwise assumes an identity scale and returns a tensor of ones. allow_reflection: If `True`, allows the algorithm to return `R` which is orthonormal but has determinant==-1. eps: A scalar for clamping to avoid dividing by zero. Active for the code that estimates the output scale `s`. Returns: 3-element tuple containing - **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`. - **T**: Batch of translations of shape `(minibatch, d)`. - **s**: batch of scaling factors of shape `(minibatch, )`. References: [1] Shinji Umeyama: Least-Suqares Estimation of Transformation Parameters Between Two Point Patterns """ # make sure we convert input Pointclouds structures to tensors Xt, num_points = _convert_point_cloud_to_tensor(X) Yt, num_points_Y = _convert_point_cloud_to_tensor(Y) if (Xt.shape != Yt.shape) or (num_points != num_points_Y).any(): raise ValueError("Point sets X and Y have to have the same \ number of batches, points and dimensions.") if weights is not None: if isinstance(weights, list): if any(np != w.shape[0] for np, w in zip(num_points, weights)): raise ValueError("number of weights should equal to the " + "number of points in the point cloud.") weights = [w[..., None] for w in weights] weights = strutil.list_to_padded(weights)[..., 0] if Xt.shape[:2] != weights.shape: raise ValueError( "weights should have the same first two dimensions as X.") b, n, dim = Xt.shape if (num_points < Xt.shape[1]).any() or (num_points < Yt.shape[1]).any(): # in case we got Pointclouds as input, mask the unused entries in Xc, Yc mask = (torch.arange(n, dtype=torch.int64, device=Xt.device)[None] < num_points[:, None]).type_as(Xt) weights = mask if weights is None else mask * weights.type_as(Xt) # compute the centroids of the point sets Xmu = oputil.wmean(Xt, weights, eps=eps) Ymu = oputil.wmean(Yt, weights, eps=eps) # mean-center the point sets Xc = Xt - Xmu Yc = Yt - Ymu total_weight = torch.clamp(num_points, 1) # special handling for heterogeneous point clouds and/or input weights if weights is not None: Xc *= weights[:, :, None] Yc *= weights[:, :, None] total_weight = torch.clamp(weights.sum(1), eps) if (num_points < (dim + 1)).any(): warnings.warn( "The size of one of the point clouds is <= dim+1. " + "corresponding_points_alignment can't return a unique solution.") # compute the covariance XYcov between the point sets Xc, Yc XYcov = torch.bmm(Xc.transpose(2, 1), Yc) XYcov = XYcov / total_weight[:, None, None] # decompose the covariance matrix XYcov U, S, V = torch.svd(XYcov) # identity matrix used for fixing reflections E = torch.eye(dim, dtype=XYcov.dtype, device=XYcov.device)[None].repeat(b, 1, 1) if not allow_reflection: # reflection test: # checks whether the estimated rotation has det==1, # if not, finds the nearest rotation s.t. det==1 by # flipping the sign of the last singular vector U R_test = torch.bmm(U, V.transpose(2, 1)) E[:, -1, -1] = torch.det(R_test) # find the rotation matrix by composing U and V again R = torch.bmm(torch.bmm(U, E), V.transpose(2, 1)) if estimate_scale: # estimate the scaling component of the transformation trace_ES = (torch.diagonal(E, dim1=1, dim2=2) * S).sum(1) Xcov = (Xc * Xc).sum((1, 2)) / total_weight # the scaling component s = trace_ES / torch.clamp(Xcov, eps) # translation component T = Ymu[:, 0, :] - s[:, None] * torch.bmm(Xmu, R)[:, 0, :] else: # translation component T = Ymu[:, 0, :] - torch.bmm(Xmu, R)[:, 0, :] # unit scaling since we do not estimate scale s = T.new_ones(b) return R, T, s