Example #1
0
    def evaluate_depth(self, batch):
        """
        Evaluate batch to produce depth metrics.

        Returns
        -------
        output : dict
            Dictionary containing a "metrics" and a "inv_depth" key

            metrics : torch.Tensor [7]
                Depth metrics (abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3)

            inv_depth:
                predicted inverse depth
        """
        # Get predicted depth
        inv_depth = self(batch)['inv_depths'][0]
        depth = inv2depth(inv_depth)

        # Calculate predicted metrics
        metrics = compute_depth_metrics(gt=batch['projected_lidar'],
                                        pred=depth,
                                        **self.hparams.metrics)
        # Return metrics and extra information
        return {'metrics': metrics, 'inv_depth': inv_depth, 'depth': depth}
Example #2
0
    def evaluate_depth(self, batch):
        """
        Evaluate batch to produce depth metrics.

        Returns
        -------
        output : dict
            Dictionary containing a "metrics" and a "inv_depth" key

            metrics : torch.Tensor [7]
                Depth metrics (abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3)

            inv_depth:
                predicted inverse depth
        """

        total_metrics = {}
        output_by_cam = {}

        for i, camera_name in enumerate(self.camera_list):
            output_by_cam[camera_name] = {}

            # Get predicted depth
            inv_depth = self(batch)[camera_name]['inv_depths'][0]
            depth = inv2depth(inv_depth)

            # store predictions for viz purpose
            output_by_cam[camera_name]['inv_depth'] = inv_depth
            output_by_cam[camera_name]['depth'] = depth

            # Calculate predicted metrics, store by camera & averaged over all cameras
            metric_prefix = f"{camera_name}-"
            metrics = compute_depth_metrics(
                gt=batch[camera_name]['projected_lidar'],
                pred=depth,
                prefix=metric_prefix,
                **self._hparams.metrics)

            total_metrics.update(metrics)

            for key, val in metrics.items():
                key = key[len(metric_prefix):]
                total_metrics[key] = total_metrics.get(
                    key, torch.zeros_like(val)) + val
                if i == len(self.camera_list) - 1:
                    total_metrics[key] /= len(
                        self.camera_list
                    )  # average over all cameras at the end

        # Return metrics averaged by cam and extra information
        return {'metrics': total_metrics, **output_by_cam}
Example #3
0
    def forward(self, inv_depths, gt_depth, K, poses,progress=0.0):
        """
        Calculates training supervised loss.

        Parameters
        ----------
        inv_depths : list of torch.Tensor [B,1,H,W]
            Predicted depth maps for the original image, in all scales
        gt_inv_depth : torch.Tensor [B,1,H,W]
            Ground-truth depth map for the original image
        return_logs : bool
            True if logs are saved for visualization
        progress : float
            Training percentage

        Returns
        -------
        losses_and_metrics : dict
            Output dictionary
        """

        # NOTE/TODO: here we compute loss for each pose but it is useless as no rgb image is used
        # jsut take one pose (e.g., the first one)

        reprojected_losses = [[] for _ in range(self.n)]
        # Calculate and store supervised loss for each scale
        for pose in poses:
            # Calculate and store supervised loss for each scale
            depths = [inv2depth(inv_depths[i]) for i in range(self.n)]
            gt_depths = match_scales(gt_depth, depths, self.n)
            losses = self.calculate_reprojected_losses(depths, gt_depths, K, pose, progress=progress)
            for i in range(self.n):
                # unsqueeze so that tensor have a dim and can be concatenated
                reprojected_losses[i].append(losses[i].mean().unsqueeze(0))

        # Return per-scale reprojected loss mean
        reprojected_loss = sum([torch.cat(reprojected_losses[i], 0).min(0, True)[0].mean() for i in range(self.n)])
        reprojected_loss /= self.n

        self.add_metric('reprojected_loss', reprojected_loss)
        # Return losses and metrics
        return {
            'loss': reprojected_loss.unsqueeze(0),
            'metrics': self.metrics,
        }
Example #4
0
    def warp_ref_image(self, inv_depths, ref_image, K, ref_K, pose,
                       cam_to_car):
        """
        Warps a reference image to produce a reconstruction of the original one.
        Parameters
        ----------
        inv_depths : torch.Tensor [B,1,H,W]
            Inverse depth map of the original image
        ref_image : torch.Tensor [B,3,H,W]
            Reference RGB image
        K : torch.Tensor [B,3,3]
            Original camera intrinsics
        ref_K : torch.Tensor [B,3,3]
            Reference camera intrinsics
        pose : Pose
            Original -> Reference camera transformation
        Returns
        -------
        ref_warped : torch.Tensor [B,3,H,W]
            Warped reference image (reconstructing the original one)
        """
        B, _, H, W = ref_image.shape
        device = ref_image.get_device()
        # Generate cameras for all scales
        cams, ref_cams = [], []

        cam_to_car = cam_to_car.to(dtype=pose.mat.dtype)

        rot_cam_to_car = torch.eye(4,
                                   device=pose.mat.device,
                                   dtype=pose.mat.dtype).repeat(
                                       [len(pose), 1, 1])
        rot_cam_to_car[:, :3, :3] = cam_to_car[:, :3, :3]

        rot_car_to_cam = torch.eye(4,
                                   device=pose.mat.device,
                                   dtype=pose.mat.dtype).repeat(
                                       [len(pose), 1, 1])
        rot_car_to_cam[:, :3, :3] = torch.transpose(rot_car_to_cam[:, :3, :3],
                                                    -2, -1)

        car_to_cam = rot_car_to_cam.clone()
        car_to_cam[:, :3,
                   -1] = torch.bmm(-1. * rot_car_to_cam[:, :3, :3],
                                   cam_to_car[:, :3,
                                              -1].unsqueeze(-1)).squeeze(-1)

        pose = Pose(car_to_cam) @ Pose(rot_cam_to_car) @ pose @ Pose(
            cam_to_car)

        for i in range(self.n):
            _, _, DH, DW = inv_depths[i].shape
            scale_factor = DW / float(W)
            cams.append(Camera(K=K.float()).scaled(scale_factor).to(device))
            ref_cams.append(
                Camera(K=ref_K.float(),
                       Tcw=pose).scaled(scale_factor).to(device))

        # View synthesis
        depths = [inv2depth(inv_depths[i]) for i in range(self.n)]
        ref_images = match_scales(ref_image, inv_depths, self.n)
        ref_warped = [
            view_synthesis(ref_images[i],
                           depths[i],
                           ref_cams[i],
                           cams[i],
                           padding_mode=self.padding_mode)
            for i in range(self.n)
        ]
        # Return warped reference image
        return ref_warped
Example #5
0
    def forward(self,
                target_view,
                source_views,
                inv_depths,
                K,
                poses,
                progress=0.0):
        """
        Calculates training photometric loss.
        Parameters
        ----------
        image : torch.Tensor [B,3,H,W]
            Original image
        context : list of torch.Tensor [B,3,H,W]
            Context containing a list of reference images
        inv_depths : list of torch.Tensor [B,1,H,W]
            Predicted depth maps for the original image, in all scales
        K : torch.Tensor [B,3,3]
            Original camera intrinsics
        poses : list of Pose
            Camera transformation between original and context
        progress : float
            Training percentage
        Returns
        -------
        losses_and_metrics : dict
            Output dictionary
        """
        # If using progressive scaling
        self.n = self.progressive_scaling(progress)

        photometric_losses = [[] for _ in range(self.n)]

        target_images = match_scales(target_view, inv_depths, self.n)
        depths = [inv2depth(inv_depths[i]) for i in range(self.n)]

        # Loop over all reference images
        for (source_view, pose) in zip(source_views, poses):
            # Calculate warped images
            ref_warped = self.warp_ref_images(depths, source_view, K, K, pose)
            # Calculate and store image loss
            photometric_loss = self.calc_photometric_loss(
                ref_warped, target_images)
            for i in range(self.n):
                photometric_losses[i].append(photometric_loss[i])

            # If using automask
            if self.automask_loss:
                # Calculate and store unwarped image loss
                ref_images = match_scales(source_view, inv_depths, self.n)
                unwarped_image_loss = self.calc_photometric_loss(
                    ref_images, target_images)
                for i in range(self.n):
                    photometric_losses[i].append(unwarped_image_loss[i])

        # Calculate reduced photometric loss
        total_photo_loss = self.reduce_photometric_loss(photometric_losses)

        losses = [total_photo_loss]

        # Include smoothness loss if requested
        if self.smooth_loss_weight > 0.0:
            smoothness_loss = self.calc_smoothness_loss(
                inv_depths, target_images)
            losses.append(smoothness_loss)

        # Include uniformity regularization loss if requested
        if self.uniformity_weight > 0.0:
            uniformity_loss = self.calc_uniformity_regularization(inv_depths)
            losses.append(uniformity_loss)

        total_loss = sum(losses)

        # Return losses and metrics
        return {
            'loss': total_loss.unsqueeze(0),
            'metrics': self.metrics,
        }
Example #6
0
    def forward(self,
                target_view,
                source_views,
                inv_depths,
                gt_depth,
                K,
                poses,
                progress=0.0):
        """
        Calculates training supervised loss.

        Parameters
        ----------
        inv_depths : list of torch.Tensor [B,1,H,W]
            Predicted depth maps for the source image, in all scales
        gt_depth : torch.Tensor [B,1,H,W]
            Ground-truth depth map for the source image
        progress : float
            Training percentage

        Returns
        -------
        losses_and_metrics : dict
            Output dictionary
        """

        # If using progressive scaling
        self.n = self.progressive_scaling(progress)

        photometric_losses = [[] for _ in range(self.n)]
        gt_photometric_losses = [[] for _ in range(self.n)]

        target_images = match_scales(target_view, inv_depths, self.n)
        depths = [inv2depth(inv_depths[i]) for i in range(self.n)]
        gt_depths = match_scales(gt_depth, depths, self.n)

        for (source_view, pose) in zip(source_views, poses):

            # Calculate warped images
            ref_warped = self.warp_ref_images(depths, source_view, K, K, pose)
            # Calculate and store image loss
            photometric_loss = self.calc_photometric_loss(
                ref_warped, target_images)
            for i in range(self.n):
                photometric_losses[i].append(photometric_loss[i])

            # Calculate warped images
            ref_gt_warped = self.warp_ref_images(gt_depths, source_view, K, K,
                                                 pose)
            # Calculate and store image loss
            gt_photometric_loss = self.calc_photometric_loss(
                ref_gt_warped, target_images)
            for i in range(self.n):
                gt_depth_mask = (gt_depths[i] <= 0).float()
                # set loss for missing gt pixels to be high so they are never chosen as minimum
                gt_photometric_losses[i].append(gt_photometric_loss[i] +
                                                1000. * gt_depth_mask)

            # If using automask
            if self.automask_loss:
                # Calculate and store unwarped image loss
                ref_images = match_scales(source_view, inv_depths, self.n)
                unwarped_image_loss = self.calc_photometric_loss(
                    ref_images, target_images)
                for i in range(self.n):
                    photometric_losses[i].append(unwarped_image_loss[i])

        # Calculate reduced loss
        loss = self.reduce_photometric_loss(photometric_losses)

        depth_hints_mask = self.calc_depth_hints_mask(photometric_losses,
                                                      gt_photometric_losses)
        depth_hints_loss = self.calc_depth_hints_loss(depth_hints_mask,
                                                      depths,
                                                      gt_depths,
                                                      K,
                                                      poses[0],
                                                      progress=progress)

        # make a list as in-pace sum is not auto-grad friendly
        losses = [loss, depth_hints_loss]

        # Include smoothness loss if requested
        if self.smooth_loss_weight > 0.0:
            losses.append(self.calc_smoothness_loss(inv_depths, target_images))

        # Include uniformity regularization loss if requested
        if self.uniformity_weight > 0.0:
            losses.append(self.calc_uniformity_regularization(inv_depths))

        total_loss = sum(losses)

        # Return losses and metrics
        return {
            'loss': total_loss.unsqueeze(0),
            'metrics': self.metrics,
        }