def forward(ctx, points, points_first_idx, segms, segms_first_idx, max_segms): """ Args: ctx: Context object used to calculate gradients. points: FloatTensor of shape `(P, 3)` points_first_idx: LongTensor of shape `(N,)` indicating the first point index for each example in the mesh segms: FloatTensor of shape `(S, 2, 3)` of edge segments. The `s`-th edge segment is spanned by `(segms[s, 0], segms[s, 1])` segms_first_idx: LongTensor of shape `(N,)` indicating the first edge index for each example in the mesh max_segms: Scalar equal to maximum number of edges in the batch Returns: dists: FloatTensor of shape `(S,)`, where `dists[s]` is the squared euclidean distance of `s`-th edge to the closest point in the corresponding example in the batch idxs: LongTensor of shape `(S,)` indicating the closest point in the corresponding example in the batch. `dists[s] = d(points[idxs[s]], edges[s, 0], edges[s, 1])`, where `d(u, v0, v1)` is the distance of point `u` from the segment spanned by `(v0, v1)`. """ dists, idxs = _C.edge_point_dist_forward(points, points_first_idx, segms, segms_first_idx, max_segms) ctx.save_for_backward(points, segms, idxs) return dists
def test_edge_point_distance(self): """ Test CUDA implementation for EdgePointDistanceForward & EdgePointDistanceBackward """ device = get_random_cuda_device() N, V, F, P = 4, 32, 16, 24 meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device) # make points packed a leaf node points_packed = pcls.points_packed().detach().clone() # (P, 3) points_first_idx = pcls.cloud_to_packed_first_idx() # make edges packed a leaf node verts_packed = meshes.verts_packed() edges_packed = verts_packed[meshes.edges_packed()] # (E, 2, 3) edges_packed = edges_packed.clone().detach() edges_first_idx = meshes.mesh_to_edges_packed_first_idx() max_e = meshes.num_edges_per_mesh().max().item() # leaf nodes points_packed.requires_grad = True edges_packed.requires_grad = True grad_dists = torch.rand( (edges_packed.shape[0],), dtype=torch.float32, device=device ) # Cuda Implementation: forward dists_cuda, idx_cuda = _C.edge_point_dist_forward( points_packed, points_first_idx, edges_packed, edges_first_idx, max_e ) # Cuda Implementation: backward grad_points_cuda, grad_edges_cuda = _C.edge_point_dist_backward( points_packed, edges_packed, idx_cuda, grad_dists ) # Cpu Implementation: forward dists_cpu, idx_cpu = _C.edge_point_dist_forward( points_packed.cpu(), points_first_idx.cpu(), edges_packed.cpu(), edges_first_idx.cpu(), max_e, ) # Cpu Implementation: backward grad_points_cpu, grad_edges_cpu = _C.edge_point_dist_backward( points_packed.cpu(), edges_packed.cpu(), idx_cpu, grad_dists.cpu() ) # Naive Implementation: forward edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist()) dists_naive = [] for i in range(N): points = pcls.points_list()[i] edges = edges_list[i] dists_temp = torch.zeros( (edges.shape[0], points.shape[0]), dtype=torch.float32, device=device ) for e in range(edges.shape[0]): for p in range(points.shape[0]): dist = self._point_to_edge_distance(points[p], edges[e]) dists_temp[e, p] = dist # torch.min() doesn't necessarily return the first index of the # smallest value, our warp_reduce does. So it's not straightforward # to directly compare indices, nor the gradients of grad_edges which # also depend on the indices of the minimum value. # To be able to compare, we will compare dists_temp.min(1) and # then feed the cuda indices to the naive output start = edges_first_idx[i] end = edges_first_idx[i + 1] if i < N - 1 else edges_packed.shape[0] min_idx = idx_cuda.cpu()[start:end] - points_first_idx[i].cpu() iidx = torch.arange(edges.shape[0], device=device) min_dist = dists_temp[iidx, min_idx] dists_naive.append(min_dist) dists_naive = torch.cat(dists_naive) # Compare self.assertClose(dists_naive.cpu(), dists_cuda.cpu()) self.assertClose(dists_naive.cpu(), dists_cpu) # Naive Implementation: backward dists_naive.backward(grad_dists) grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()]) grad_edges_naive = edges_packed.grad.cpu() # Compare self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7) self.assertClose(grad_edges_naive, grad_edges_cuda.cpu(), atol=5e-7) self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7) self.assertClose(grad_edges_naive, grad_edges_cpu, atol=5e-7)