def backward(ctx, grad_dists): grad_dists = grad_dists.contiguous() points, tris, idxs = ctx.saved_tensors grad_points, grad_tris = _C.face_point_dist_backward( points, tris, idxs, grad_dists ) return grad_points, None, grad_tris, None, None
def test_face_point_distance(self): """ Test CUDA implementation for FacePointDistanceForward & FacePointDistanceBackward """ device = get_random_cuda_device() N, V, F, P = 4, 32, 16, 24 meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device) # make points packed a leaf node points_packed = pcls.points_packed().detach().clone() # (P, 3) points_first_idx = pcls.cloud_to_packed_first_idx() # make edges packed a leaf node verts_packed = meshes.verts_packed() faces_packed = verts_packed[meshes.faces_packed()] # (T, 3, 3) faces_packed = faces_packed.clone().detach() faces_first_idx = meshes.mesh_to_faces_packed_first_idx() max_f = meshes.num_faces_per_mesh().max().item() # leaf nodes points_packed.requires_grad = True faces_packed.requires_grad = True grad_dists = torch.rand( (faces_packed.shape[0],), dtype=torch.float32, device=device ) # Cuda Implementation: forward dists_cuda, idx_cuda = _C.face_point_dist_forward( points_packed, points_first_idx, faces_packed, faces_first_idx, max_f ) # Cuda Implementation: backward grad_points_cuda, grad_faces_cuda = _C.face_point_dist_backward( points_packed, faces_packed, idx_cuda, grad_dists ) # Cpu Implementation: forward dists_cpu, idx_cpu = _C.face_point_dist_forward( points_packed.cpu(), points_first_idx.cpu(), faces_packed.cpu(), faces_first_idx.cpu(), max_f, ) # Cpu Implementation: backward grad_points_cpu, grad_faces_cpu = _C.face_point_dist_backward( points_packed.cpu(), faces_packed.cpu(), idx_cpu, grad_dists.cpu() ) # Naive Implementation: forward faces_list = packed_to_list(faces_packed, meshes.num_faces_per_mesh().tolist()) dists_naive = [] for i in range(N): points = pcls.points_list()[i] tris = faces_list[i] dists_temp = torch.zeros( (tris.shape[0], points.shape[0]), dtype=torch.float32, device=device ) for t in range(tris.shape[0]): for p in range(points.shape[0]): dist = self._point_to_tri_distance(points[p], tris[t]) dists_temp[t, p] = dist # torch.min() doesn't necessarily return the first index of the # smallest value, our warp_reduce does. So it's not straightforward # to directly compare indices, nor the gradients of grad_tris which # also depend on the indices of the minimum value. # To be able to compare, we will compare dists_temp.min(1) and # then feed the cuda indices to the naive output start = faces_first_idx[i] end = faces_first_idx[i + 1] if i < N - 1 else faces_packed.shape[0] min_idx = idx_cuda.cpu()[start:end] - points_first_idx[i].cpu() iidx = torch.arange(tris.shape[0], device=device) min_dist = dists_temp[iidx, min_idx] dists_naive.append(min_dist) dists_naive = torch.cat(dists_naive) # Compare self.assertClose(dists_naive.cpu(), dists_cuda.cpu()) self.assertClose(dists_naive.cpu(), dists_cpu) # Naive Implementation: backward dists_naive.backward(grad_dists) grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()]) grad_faces_naive = faces_packed.grad # Compare self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7) self.assertClose(grad_faces_naive.cpu(), grad_faces_cuda.cpu(), atol=5e-7) self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7) self.assertClose(grad_faces_naive.cpu(), grad_faces_cpu, atol=5e-7)