def _save_grad(grad): # NOTE: for iso_normal, this gradient is always zero (WHY?) dbg_tensor = get_debugging_tensor() # a dict of list of tensors grad = packed_to_list(grad, num_points_per_cloud) dbg_tensor.pts_world_grad[name] = [ grad[b].detach().cpu() for b in range(len(grad)) ] if dbg_tensor.pts_world_grad[name][ 0].shape != dbg_tensor.pts_world[name][0].shape: import pdb pdb.set_trace()
def mask_padded_to_list(values: torch.Tensor, mask: torch.Tensor) -> List[torch.Tensor]: """ padded_to_list with mask Args: values (tensor(number)): (N, ..., C) mask (tensor(bool)): (N, ...) bool values Returns: value_list (List(tensors)): (N,) list of filtered values (Pi, C) in each batch element, where Pi is the number of true values in mask[i] """ from pytorch3d.structures import packed_to_list batch_size = values.shape[0] value_packed = values[mask] num_true_in_batch = mask.view(batch_size, -1).sum(dim=1) value_list = packed_to_list(value_packed, num_true_in_batch.tolist()) return value_list
def test_padded_to_packed(self): # Case where each face in the mesh has 3 unique uv vertex indices # - i.e. even if a vertex is shared between multiple faces it will # have a unique uv coordinate for each face. num_verts_per_mesh = [9, 6] D = 10 verts_features_list = [torch.rand(v, D) for v in num_verts_per_mesh] verts_features_packed = list_to_packed(verts_features_list)[0] verts_features_list = packed_to_list(verts_features_packed, num_verts_per_mesh) tex = TexturesVertex(verts_features=verts_features_list) # This is set inside Meshes when textures is passed as an input. # Here we set _num_faces_per_mesh and _num_verts_per_mesh explicity. tex1 = tex.clone() tex1._num_verts_per_mesh = num_verts_per_mesh verts_packed = tex1.verts_features_packed() verts_verts_list = tex1.verts_features_list() verts_padded = tex1.verts_features_padded() for f1, f2 in zip(verts_verts_list, verts_features_list): self.assertTrue((f1 == f2).all().item()) self.assertTrue(verts_packed.shape == (sum(num_verts_per_mesh), D)) self.assertTrue(verts_padded.shape == (2, 9, D)) # Case where num_verts_per_mesh is not set and textures # are initialized with a padded tensor. tex2 = TexturesVertex(verts_features=verts_padded) verts_packed = tex2.verts_features_packed() verts_list = tex2.verts_features_list() # Packed is just flattened padded as num_verts_per_mesh # has not been provided. self.assertTrue(verts_packed.shape == (9 * 2, D)) for i, (f1, f2) in enumerate(zip(verts_list, verts_features_list)): n = num_verts_per_mesh[i] self.assertTrue((f1[:n] == f2).all().item())
def test_face_point_distance(self): """ Test CUDA implementation for FacePointDistanceForward & FacePointDistanceBackward """ device = get_random_cuda_device() N, V, F, P = 4, 32, 16, 24 meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device) # make points packed a leaf node points_packed = pcls.points_packed().detach().clone() # (P, 3) points_first_idx = pcls.cloud_to_packed_first_idx() # make edges packed a leaf node verts_packed = meshes.verts_packed() faces_packed = verts_packed[meshes.faces_packed()] # (T, 3, 3) faces_packed = faces_packed.clone().detach() faces_first_idx = meshes.mesh_to_faces_packed_first_idx() max_f = meshes.num_faces_per_mesh().max().item() # leaf nodes points_packed.requires_grad = True faces_packed.requires_grad = True grad_dists = torch.rand( (faces_packed.shape[0],), dtype=torch.float32, device=device ) # Cuda Implementation: forward dists_cuda, idx_cuda = _C.face_point_dist_forward( points_packed, points_first_idx, faces_packed, faces_first_idx, max_f ) # Cuda Implementation: backward grad_points_cuda, grad_faces_cuda = _C.face_point_dist_backward( points_packed, faces_packed, idx_cuda, grad_dists ) # Cpu Implementation: forward dists_cpu, idx_cpu = _C.face_point_dist_forward( points_packed.cpu(), points_first_idx.cpu(), faces_packed.cpu(), faces_first_idx.cpu(), max_f, ) # Cpu Implementation: backward grad_points_cpu, grad_faces_cpu = _C.face_point_dist_backward( points_packed.cpu(), faces_packed.cpu(), idx_cpu, grad_dists.cpu() ) # Naive Implementation: forward faces_list = packed_to_list(faces_packed, meshes.num_faces_per_mesh().tolist()) dists_naive = [] for i in range(N): points = pcls.points_list()[i] tris = faces_list[i] dists_temp = torch.zeros( (tris.shape[0], points.shape[0]), dtype=torch.float32, device=device ) for t in range(tris.shape[0]): for p in range(points.shape[0]): dist = self._point_to_tri_distance(points[p], tris[t]) dists_temp[t, p] = dist # torch.min() doesn't necessarily return the first index of the # smallest value, our warp_reduce does. So it's not straightforward # to directly compare indices, nor the gradients of grad_tris which # also depend on the indices of the minimum value. # To be able to compare, we will compare dists_temp.min(1) and # then feed the cuda indices to the naive output start = faces_first_idx[i] end = faces_first_idx[i + 1] if i < N - 1 else faces_packed.shape[0] min_idx = idx_cuda.cpu()[start:end] - points_first_idx[i].cpu() iidx = torch.arange(tris.shape[0], device=device) min_dist = dists_temp[iidx, min_idx] dists_naive.append(min_dist) dists_naive = torch.cat(dists_naive) # Compare self.assertClose(dists_naive.cpu(), dists_cuda.cpu()) self.assertClose(dists_naive.cpu(), dists_cpu) # Naive Implementation: backward dists_naive.backward(grad_dists) grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()]) grad_faces_naive = faces_packed.grad # Compare self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7) self.assertClose(grad_faces_naive.cpu(), grad_faces_cuda.cpu(), atol=5e-7) self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7) self.assertClose(grad_faces_naive.cpu(), grad_faces_cpu, atol=5e-7)
def test_point_mesh_edge_distance(self): """ Test point_mesh_edge_distance from pytorch3d.loss """ device = get_random_cuda_device() N, V, F, P = 4, 32, 16, 24 meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device) # clone and detach for another backward pass through the op verts_op = [verts.clone().detach() for verts in meshes.verts_list()] for i in range(N): verts_op[i].requires_grad = True faces_op = [faces.clone().detach() for faces in meshes.faces_list()] meshes_op = Meshes(verts=verts_op, faces=faces_op) points_op = [points.clone().detach() for points in pcls.points_list()] for i in range(N): points_op[i].requires_grad = True pcls_op = Pointclouds(points_op) # Cuda implementation: forward & backward loss_op = point_mesh_edge_distance(meshes_op, pcls_op) # Naive implementation: forward & backward edges_packed = meshes.edges_packed() edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist()) loss_naive = torch.zeros(N, dtype=torch.float32, device=device) for i in range(N): points = pcls.points_list()[i] verts = meshes.verts_list()[i] v_first_idx = meshes.mesh_to_verts_packed_first_idx()[i] edges = verts[edges_list[i] - v_first_idx] num_p = points.shape[0] num_e = edges.shape[0] dists = torch.zeros((num_p, num_e), dtype=torch.float32, device=device) for p in range(num_p): for e in range(num_e): dist = self._point_to_edge_distance(points[p], edges[e]) dists[p, e] = dist min_dist_p, min_idx_p = dists.min(1) min_dist_e, min_idx_e = dists.min(0) loss_naive[i] = min_dist_p.mean() + min_dist_e.mean() loss_naive = loss_naive.mean() # NOTE that hear the comparison holds despite the discrepancy # due to the argmin indices returned by min(). This is because # we don't will compare gradients on the verts and not on the # edges or faces. # Compare forward pass self.assertClose(loss_op, loss_naive) # Compare backward pass rand_val = torch.rand(1).item() grad_dist = torch.tensor(rand_val, dtype=torch.float32, device=device) loss_naive.backward(grad_dist) loss_op.backward(grad_dist) # check verts grad for i in range(N): self.assertClose( meshes.verts_list()[i].grad, meshes_op.verts_list()[i].grad ) self.assertClose(pcls.points_list()[i].grad, pcls_op.points_list()[i].grad)
def test_point_edge_distance(self): """ Test CUDA implementation for PointEdgeDistanceForward & PointEdgeDistanceBackward """ device = get_random_cuda_device() N, V, F, P = 4, 32, 16, 24 meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device) # make points packed a leaf node points_packed = pcls.points_packed().detach().clone() # (P, 3) points_first_idx = pcls.cloud_to_packed_first_idx() max_p = pcls.num_points_per_cloud().max().item() # make edges packed a leaf node verts_packed = meshes.verts_packed() edges_packed = verts_packed[meshes.edges_packed()] # (E, 2, 3) edges_packed = edges_packed.clone().detach() edges_first_idx = meshes.mesh_to_edges_packed_first_idx() # leaf nodes points_packed.requires_grad = True edges_packed.requires_grad = True grad_dists = torch.rand( (points_packed.shape[0],), dtype=torch.float32, device=device ) # Cuda Implementation: forward dists_cuda, idx_cuda = _C.point_edge_dist_forward( points_packed, points_first_idx, edges_packed, edges_first_idx, max_p ) # Cuda Implementation: backward grad_points_cuda, grad_edges_cuda = _C.point_edge_dist_backward( points_packed, edges_packed, idx_cuda, grad_dists ) # Cpu Implementation: forward dists_cpu, idx_cpu = _C.point_edge_dist_forward( points_packed.cpu(), points_first_idx.cpu(), edges_packed.cpu(), edges_first_idx.cpu(), max_p, ) # Cpu Implementation: backward # Note that using idx_cpu doesn't pass - there seems to be a problem with tied results. grad_points_cpu, grad_edges_cpu = _C.point_edge_dist_backward( points_packed.cpu(), edges_packed.cpu(), idx_cuda.cpu(), grad_dists.cpu() ) # Naive Implementation: forward edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist()) dists_naive = [] for i in range(N): points = pcls.points_list()[i] edges = edges_list[i] dists_temp = torch.zeros( (points.shape[0], edges.shape[0]), dtype=torch.float32, device=device ) for p in range(points.shape[0]): for e in range(edges.shape[0]): dist = self._point_to_edge_distance(points[p], edges[e]) dists_temp[p, e] = dist # torch.min() doesn't necessarily return the first index of the # smallest value, our warp_reduce does. So it's not straightforward # to directly compare indices, nor the gradients of grad_edges which # also depend on the indices of the minimum value. # To be able to compare, we will compare dists_temp.min(1) and # then feed the cuda indices to the naive output start = points_first_idx[i] end = points_first_idx[i + 1] if i < N - 1 else points_packed.shape[0] min_idx = idx_cuda[start:end] - edges_first_idx[i] iidx = torch.arange(points.shape[0], device=device) min_dist = dists_temp[iidx, min_idx] dists_naive.append(min_dist) dists_naive = torch.cat(dists_naive) # Compare self.assertClose(dists_naive.cpu(), dists_cuda.cpu()) self.assertClose(dists_naive.cpu(), dists_cpu) # Naive Implementation: backward dists_naive.backward(grad_dists) grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()]) grad_edges_naive = edges_packed.grad.cpu() # Compare self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7) self.assertClose(grad_edges_naive, grad_edges_cuda.cpu(), atol=5e-7) self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7) self.assertClose(grad_edges_naive, grad_edges_cpu, atol=5e-7)