def get_comparisons(self, model_input, prediction, ground_truth=None): predictions, depth_maps = prediction batch_size = predictions.shape[0] # Parse model input. intrinsics = model_input["intrinsics"].cuda() uv = model_input["uv"].cuda().float() x_cam = uv[:, :, 0].view(batch_size, -1) y_cam = uv[:, :, 1].view(batch_size, -1) z_cam = depth_maps.view(batch_size, -1) normals = geometry.compute_normal_map(x_img=x_cam, y_img=y_cam, z=z_cam, intrinsics=intrinsics) normals = F.pad(normals, pad=(1, 1, 1, 1), mode="constant", value=1.) predictions = util.lin2img(predictions) if ground_truth is not None: trgt_imgs = ground_truth["rgb"] trgt_imgs = util.lin2img(trgt_imgs) return torch.cat( (normals.cpu(), predictions.cpu(), trgt_imgs.cpu()), dim=3).numpy() else: return torch.cat((normals.cpu(), predictions.cpu()), dim=3).numpy()
def forward(self, input, z=None): self.logs = list() # log saves tensors that"ll receive summaries when model"s write_updates function is called # Parse model input. instance_idcs = input["instance_idx"].long().cuda() pose = input["pose"].cuda() intrinsics = input["intrinsics"].cuda() uv = input["uv"].cuda().float() if self.fit_single_srn: phi = self.phi else: if self.has_params: # If each instance has a latent parameter vector, we"ll use that one. if z is None: self.z = input["param"].cuda() else: self.z = z else: # Else, we"ll use the embedding. self.z = self.latent_codes(instance_idcs) phi = self.hyper_phi(self.z) # Forward pass through hypernetwork yields a (callable) SRN. # Raymarch SRN phi along rays defined by camera pose, intrinsics and uv coordinates. points_xyz, depth_maps, log = self.ray_marcher(cam2world=pose, intrinsics=intrinsics, uv=uv, phi=phi) self.logs.extend(log) # Sapmle phi a last time at the final ray-marched world coordinates. v = phi(points_xyz) # Translate features at ray-marched world coordinates to RGB colors. novel_views = self.pixel_generator(v) # Calculate normal map with torch.no_grad(): batch_size = uv.shape[0] x_cam = uv[:, :, 0].view(batch_size, -1) y_cam = uv[:, :, 1].view(batch_size, -1) z_cam = depth_maps.view(batch_size, -1) normals = geometry.compute_normal_map(x_img=x_cam, y_img=y_cam, z=z_cam, intrinsics=intrinsics) self.logs.append(("image", "normals", torchvision.utils.make_grid(normals, scale_each=True, normalize=True), 100)) if not self.fit_single_srn: self.logs.append(("embedding", "", self.latent_codes.weight, 500)) self.logs.append(("scalar", "embed_min", self.z.min(), 1)) self.logs.append(("scalar", "embed_max", self.z.max(), 1)) return novel_views, depth_maps
def forward(self, input, z=None): self.logs = list( ) # log saves tensors that"ll receive summaries when model"s write_updates function is called uv = np.mgrid[0:128, 0:128].astype(np.int32) uv = torch.from_numpy(np.flip(uv, axis=0).copy()).long() uv = uv.reshape(2, -1).transpose(1, 0) uvs = torch.cat([uv.unsqueeze(0) for _ in range(input[0].size(0))]) # Parse model input. pose = input[0].cuda() rgb_mat = input[1].cuda() intrinsics = None uv = uvs.cuda().float() v_scene = self.phi_scene(rgb_mat) v_scene = expand(v_scene, uv.size(1)) # Raymarch SRN phi along rays defined by camera pose, intrinsics and uv coordinates. points_xyz, depth_maps, log = self.ray_marcher(cam2world=pose, intrinsics=intrinsics, uv=uv, phi=self.phi, v_scene=v_scene) self.logs.extend(log) # Sample phi a last time at the final ray-marched world coordinates. v = self.phi(torch.cat([points_xyz, v_scene], dim=2)) # Translate features at ray-marched world coordinates to RGB colors. novel_views = self.pixel_generator(v) # Calculate normal map with torch.no_grad(): batch_size = uv.shape[0] x_cam = uv[:, :, 0].view(batch_size, -1) y_cam = uv[:, :, 1].view(batch_size, -1) z_cam = depth_maps.view(batch_size, -1) normals = geometry.compute_normal_map(x_img=x_cam, y_img=y_cam, z=z_cam, intrinsics=intrinsics) self.logs.append( ("image", "normals", torchvision.utils.make_grid(normals, scale_each=True, normalize=True), 100)) return novel_views, depth_maps
def forward(self, input, z=None): self.logs = list( ) # log saves tensors that"ll receive summaries when model"s write_updates function is called # Parse model input. instance_idcs = input["instance_idx"].long().cuda() pose = input["pose"].cuda() intrinsics = input["intrinsics"].cuda() uv = input["uv"].cuda().float() if self.fit_single_srn: phi = self.phi else: if self.has_params: # If each instance has a latent parameter vector, we"ll use that one. if z is None: self.z = input["param"].cuda() else: self.z = z else: # Else, we"ll use the embedding. self.z = self.latent_codes(instance_idcs) phi = self.hyper_phi( self.z ) # Forward pass through hypernetwork yields a (callable) SRN. # Raymarch SRN phi along rays defined by camera pose, intrinsics and uv coordinates. points_xyz, depth_maps, log = self.ray_marcher(cam2world=pose, intrinsics=intrinsics, uv=uv, phi=phi) self.logs.extend(log) # Sapmle phi a last time at the final ray-marched world coordinates. v = phi(points_xyz) # Translate features at ray-marched world coordinates to RGB colors. # novel_views = self.pixel_generator(v) # [8, 1024, 256] # Use attention out_channels = 128 batch_size, _, in_channels = v.shape img_sidelength = 32 v_ = v.view(-1, img_sidelength, img_sidelength, in_channels).permute(0, 3, 1, 2).contiguous() g = self.conv_g(v_) phi = self.conv_phi(v_) theta = self.conv_theta(v_) g_x = g.permute(0, 2, 3, 1).view(batch_size, -1, out_channels).contiguous() theta_x = theta.permute(0, 2, 3, 1).view(batch_size, -1, out_channels).contiguous() phi_x = phi.view(batch_size, out_channels, -1) f = torch.matmul(theta_x, phi_x) f_softmax = torch.softmax(f, -1) y = torch.matmul(f_softmax, g_x) y = y.view(batch_size, out_channels, img_sidelength, img_sidelength) w_y = self.w_y(y).view(batch_size, -1, in_channels) v = torch.add(v, w_y) novel_views = self.pixel_generator(v) # Use simlpe conv network # in_channelsv.shape # size = 32 # v_ = v.reshape(-1, size, size, in_channels).permute(0, 3, 1, 2) # self.conv1 = nn.Conv2d(256, 128, 3, padding=1).cuda() # self.conv2 = nn.Conv2d(128, 64, 3, padding=1).cuda() # self.conv3 = nn.Conv2d(64, 3, 3, padding=1).cuda() # v_ = self.conv1(v_) # v_ = self.conv2(v_) # v_ = self.conv3(v_) # novel_views = v_.permute(0, 2, 3, 1).reshape(-1, size**2, 3) # Calculate normal map with torch.no_grad(): batch_size = uv.shape[0] x_cam = uv[:, :, 0].view(batch_size, -1) y_cam = uv[:, :, 1].view(batch_size, -1) z_cam = depth_maps.view(batch_size, -1) normals = geometry.compute_normal_map(x_img=x_cam, y_img=y_cam, z=z_cam, intrinsics=intrinsics) self.logs.append( ("image", "normals", torchvision.utils.make_grid(normals, scale_each=True, normalize=True), 100)) if not self.fit_single_srn: self.logs.append(("embedding", "", self.latent_codes.weight, 500)) self.logs.append(("scalar", "embed_min", self.z.min(), 1)) self.logs.append(("scalar", "embed_max", self.z.max(), 1)) return novel_views, depth_maps