def evaluate_depth(self, batch): """ Evaluate batch to produce depth metrics. Returns ------- output : dict Dictionary containing a "metrics" and a "inv_depth" key metrics : torch.Tensor [7] Depth metrics (abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3) inv_depth: predicted inverse depth """ # Get predicted depth inv_depth = self(batch)['inv_depths'][0] depth = inv2depth(inv_depth) # Calculate predicted metrics metrics = compute_depth_metrics(gt=batch['projected_lidar'], pred=depth, **self.hparams.metrics) # Return metrics and extra information return {'metrics': metrics, 'inv_depth': inv_depth, 'depth': depth}
def evaluate_depth(self, batch): """ Evaluate batch to produce depth metrics. Returns ------- output : dict Dictionary containing a "metrics" and a "inv_depth" key metrics : torch.Tensor [7] Depth metrics (abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3) inv_depth: predicted inverse depth """ total_metrics = {} output_by_cam = {} for i, camera_name in enumerate(self.camera_list): output_by_cam[camera_name] = {} # Get predicted depth inv_depth = self(batch)[camera_name]['inv_depths'][0] depth = inv2depth(inv_depth) # store predictions for viz purpose output_by_cam[camera_name]['inv_depth'] = inv_depth output_by_cam[camera_name]['depth'] = depth # Calculate predicted metrics, store by camera & averaged over all cameras metric_prefix = f"{camera_name}-" metrics = compute_depth_metrics( gt=batch[camera_name]['projected_lidar'], pred=depth, prefix=metric_prefix, **self._hparams.metrics) total_metrics.update(metrics) for key, val in metrics.items(): key = key[len(metric_prefix):] total_metrics[key] = total_metrics.get( key, torch.zeros_like(val)) + val if i == len(self.camera_list) - 1: total_metrics[key] /= len( self.camera_list ) # average over all cameras at the end # Return metrics averaged by cam and extra information return {'metrics': total_metrics, **output_by_cam}
def forward(self, inv_depths, gt_depth, K, poses,progress=0.0): """ Calculates training supervised loss. Parameters ---------- inv_depths : list of torch.Tensor [B,1,H,W] Predicted depth maps for the original image, in all scales gt_inv_depth : torch.Tensor [B,1,H,W] Ground-truth depth map for the original image return_logs : bool True if logs are saved for visualization progress : float Training percentage Returns ------- losses_and_metrics : dict Output dictionary """ # NOTE/TODO: here we compute loss for each pose but it is useless as no rgb image is used # jsut take one pose (e.g., the first one) reprojected_losses = [[] for _ in range(self.n)] # Calculate and store supervised loss for each scale for pose in poses: # Calculate and store supervised loss for each scale depths = [inv2depth(inv_depths[i]) for i in range(self.n)] gt_depths = match_scales(gt_depth, depths, self.n) losses = self.calculate_reprojected_losses(depths, gt_depths, K, pose, progress=progress) for i in range(self.n): # unsqueeze so that tensor have a dim and can be concatenated reprojected_losses[i].append(losses[i].mean().unsqueeze(0)) # Return per-scale reprojected loss mean reprojected_loss = sum([torch.cat(reprojected_losses[i], 0).min(0, True)[0].mean() for i in range(self.n)]) reprojected_loss /= self.n self.add_metric('reprojected_loss', reprojected_loss) # Return losses and metrics return { 'loss': reprojected_loss.unsqueeze(0), 'metrics': self.metrics, }
def warp_ref_image(self, inv_depths, ref_image, K, ref_K, pose, cam_to_car): """ Warps a reference image to produce a reconstruction of the original one. Parameters ---------- inv_depths : torch.Tensor [B,1,H,W] Inverse depth map of the original image ref_image : torch.Tensor [B,3,H,W] Reference RGB image K : torch.Tensor [B,3,3] Original camera intrinsics ref_K : torch.Tensor [B,3,3] Reference camera intrinsics pose : Pose Original -> Reference camera transformation Returns ------- ref_warped : torch.Tensor [B,3,H,W] Warped reference image (reconstructing the original one) """ B, _, H, W = ref_image.shape device = ref_image.get_device() # Generate cameras for all scales cams, ref_cams = [], [] cam_to_car = cam_to_car.to(dtype=pose.mat.dtype) rot_cam_to_car = torch.eye(4, device=pose.mat.device, dtype=pose.mat.dtype).repeat( [len(pose), 1, 1]) rot_cam_to_car[:, :3, :3] = cam_to_car[:, :3, :3] rot_car_to_cam = torch.eye(4, device=pose.mat.device, dtype=pose.mat.dtype).repeat( [len(pose), 1, 1]) rot_car_to_cam[:, :3, :3] = torch.transpose(rot_car_to_cam[:, :3, :3], -2, -1) car_to_cam = rot_car_to_cam.clone() car_to_cam[:, :3, -1] = torch.bmm(-1. * rot_car_to_cam[:, :3, :3], cam_to_car[:, :3, -1].unsqueeze(-1)).squeeze(-1) pose = Pose(car_to_cam) @ Pose(rot_cam_to_car) @ pose @ Pose( cam_to_car) for i in range(self.n): _, _, DH, DW = inv_depths[i].shape scale_factor = DW / float(W) cams.append(Camera(K=K.float()).scaled(scale_factor).to(device)) ref_cams.append( Camera(K=ref_K.float(), Tcw=pose).scaled(scale_factor).to(device)) # View synthesis depths = [inv2depth(inv_depths[i]) for i in range(self.n)] ref_images = match_scales(ref_image, inv_depths, self.n) ref_warped = [ view_synthesis(ref_images[i], depths[i], ref_cams[i], cams[i], padding_mode=self.padding_mode) for i in range(self.n) ] # Return warped reference image return ref_warped
def forward(self, target_view, source_views, inv_depths, K, poses, progress=0.0): """ Calculates training photometric loss. Parameters ---------- image : torch.Tensor [B,3,H,W] Original image context : list of torch.Tensor [B,3,H,W] Context containing a list of reference images inv_depths : list of torch.Tensor [B,1,H,W] Predicted depth maps for the original image, in all scales K : torch.Tensor [B,3,3] Original camera intrinsics poses : list of Pose Camera transformation between original and context progress : float Training percentage Returns ------- losses_and_metrics : dict Output dictionary """ # If using progressive scaling self.n = self.progressive_scaling(progress) photometric_losses = [[] for _ in range(self.n)] target_images = match_scales(target_view, inv_depths, self.n) depths = [inv2depth(inv_depths[i]) for i in range(self.n)] # Loop over all reference images for (source_view, pose) in zip(source_views, poses): # Calculate warped images ref_warped = self.warp_ref_images(depths, source_view, K, K, pose) # Calculate and store image loss photometric_loss = self.calc_photometric_loss( ref_warped, target_images) for i in range(self.n): photometric_losses[i].append(photometric_loss[i]) # If using automask if self.automask_loss: # Calculate and store unwarped image loss ref_images = match_scales(source_view, inv_depths, self.n) unwarped_image_loss = self.calc_photometric_loss( ref_images, target_images) for i in range(self.n): photometric_losses[i].append(unwarped_image_loss[i]) # Calculate reduced photometric loss total_photo_loss = self.reduce_photometric_loss(photometric_losses) losses = [total_photo_loss] # Include smoothness loss if requested if self.smooth_loss_weight > 0.0: smoothness_loss = self.calc_smoothness_loss( inv_depths, target_images) losses.append(smoothness_loss) # Include uniformity regularization loss if requested if self.uniformity_weight > 0.0: uniformity_loss = self.calc_uniformity_regularization(inv_depths) losses.append(uniformity_loss) total_loss = sum(losses) # Return losses and metrics return { 'loss': total_loss.unsqueeze(0), 'metrics': self.metrics, }
def forward(self, target_view, source_views, inv_depths, gt_depth, K, poses, progress=0.0): """ Calculates training supervised loss. Parameters ---------- inv_depths : list of torch.Tensor [B,1,H,W] Predicted depth maps for the source image, in all scales gt_depth : torch.Tensor [B,1,H,W] Ground-truth depth map for the source image progress : float Training percentage Returns ------- losses_and_metrics : dict Output dictionary """ # If using progressive scaling self.n = self.progressive_scaling(progress) photometric_losses = [[] for _ in range(self.n)] gt_photometric_losses = [[] for _ in range(self.n)] target_images = match_scales(target_view, inv_depths, self.n) depths = [inv2depth(inv_depths[i]) for i in range(self.n)] gt_depths = match_scales(gt_depth, depths, self.n) for (source_view, pose) in zip(source_views, poses): # Calculate warped images ref_warped = self.warp_ref_images(depths, source_view, K, K, pose) # Calculate and store image loss photometric_loss = self.calc_photometric_loss( ref_warped, target_images) for i in range(self.n): photometric_losses[i].append(photometric_loss[i]) # Calculate warped images ref_gt_warped = self.warp_ref_images(gt_depths, source_view, K, K, pose) # Calculate and store image loss gt_photometric_loss = self.calc_photometric_loss( ref_gt_warped, target_images) for i in range(self.n): gt_depth_mask = (gt_depths[i] <= 0).float() # set loss for missing gt pixels to be high so they are never chosen as minimum gt_photometric_losses[i].append(gt_photometric_loss[i] + 1000. * gt_depth_mask) # If using automask if self.automask_loss: # Calculate and store unwarped image loss ref_images = match_scales(source_view, inv_depths, self.n) unwarped_image_loss = self.calc_photometric_loss( ref_images, target_images) for i in range(self.n): photometric_losses[i].append(unwarped_image_loss[i]) # Calculate reduced loss loss = self.reduce_photometric_loss(photometric_losses) depth_hints_mask = self.calc_depth_hints_mask(photometric_losses, gt_photometric_losses) depth_hints_loss = self.calc_depth_hints_loss(depth_hints_mask, depths, gt_depths, K, poses[0], progress=progress) # make a list as in-pace sum is not auto-grad friendly losses = [loss, depth_hints_loss] # Include smoothness loss if requested if self.smooth_loss_weight > 0.0: losses.append(self.calc_smoothness_loss(inv_depths, target_images)) # Include uniformity regularization loss if requested if self.uniformity_weight > 0.0: losses.append(self.calc_uniformity_regularization(inv_depths)) total_loss = sum(losses) # Return losses and metrics return { 'loss': total_loss.unsqueeze(0), 'metrics': self.metrics, }