def backward(ctx, grad_dists):
     grad_dists = grad_dists.contiguous()
     points, tris, idxs = ctx.saved_tensors
     grad_points, grad_tris = _C.point_face_dist_backward(
         points, tris, idxs, grad_dists
     )
     return grad_points, None, grad_tris, None, None
Exemple #2
0
    def test_point_face_distance(self):
        """
        Test CUDA implementation for PointFaceDistanceForward
            &  PointFaceDistanceBackward
        """
        device = get_random_cuda_device()
        N, V, F, P = 4, 32, 16, 24
        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)

        # make points packed a leaf node
        points_packed = pcls.points_packed().detach().clone()  # (P, 3)

        points_first_idx = pcls.cloud_to_packed_first_idx()
        max_p = pcls.num_points_per_cloud().max().item()

        # make edges packed a leaf node
        verts_packed = meshes.verts_packed()
        faces_packed = verts_packed[meshes.faces_packed()]  # (T, 3, 3)
        faces_packed = faces_packed.clone().detach()

        faces_first_idx = meshes.mesh_to_faces_packed_first_idx()

        # leaf nodes
        points_packed.requires_grad = True
        faces_packed.requires_grad = True
        grad_dists = torch.rand(
            (points_packed.shape[0],), dtype=torch.float32, device=device
        )

        # Cuda Implementation: forward
        dists_cuda, idx_cuda = _C.point_face_dist_forward(
            points_packed, points_first_idx, faces_packed, faces_first_idx, max_p
        )

        # Cuda Implementation: backward
        grad_points_cuda, grad_faces_cuda = _C.point_face_dist_backward(
            points_packed, faces_packed, idx_cuda, grad_dists
        )

        # Cpu Implementation: forward
        dists_cpu, idx_cpu = _C.point_face_dist_forward(
            points_packed.cpu(),
            points_first_idx.cpu(),
            faces_packed.cpu(),
            faces_first_idx.cpu(),
            max_p,
        )

        # Cpu Implementation: backward
        # Note that using idx_cpu doesn't pass - there seems to be a problem with tied results.
        grad_points_cpu, grad_faces_cpu = _C.point_face_dist_backward(
            points_packed.cpu(), faces_packed.cpu(), idx_cuda.cpu(), grad_dists.cpu()
        )

        # Naive Implementation: forward
        faces_list = packed_to_list(faces_packed, meshes.num_faces_per_mesh().tolist())
        dists_naive = []
        for i in range(N):
            points = pcls.points_list()[i]
            tris = faces_list[i]
            dists_temp = torch.zeros(
                (points.shape[0], tris.shape[0]), dtype=torch.float32, device=device
            )
            for p in range(points.shape[0]):
                for t in range(tris.shape[0]):
                    dist = self._point_to_tri_distance(points[p], tris[t])
                    dists_temp[p, t] = dist

            # torch.min() doesn't necessarily return the first index of the
            # smallest value, our warp_reduce does. So it's not straightforward
            # to directly compare indices, nor the gradients of grad_tris which
            # also depend on the indices of the minimum value.
            # To be able to compare, we will compare dists_temp.min(1) and
            # then feed the cuda indices to the naive output

            start = points_first_idx[i]
            end = points_first_idx[i + 1] if i < N - 1 else points_packed.shape[0]

            min_idx = idx_cuda.cpu()[start:end] - faces_first_idx[i].cpu()
            iidx = torch.arange(points.shape[0], device=device)
            min_dist = dists_temp[iidx, min_idx]

            dists_naive.append(min_dist)

        dists_naive = torch.cat(dists_naive)

        # Compare
        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
        self.assertClose(dists_naive.cpu(), dists_cpu)

        #  Naive Implementation: backward
        dists_naive.backward(grad_dists)
        grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()])
        grad_faces_naive = faces_packed.grad.cpu()

        # Compare
        self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7)
        self.assertClose(grad_faces_naive, grad_faces_cuda.cpu(), atol=5e-7)
        self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7)
        self.assertClose(grad_faces_naive, grad_faces_cpu, atol=5e-7)