Ejemplo n.º 1
0
def save_depth(batch, output, args, dataset, save):
    """
    Save depth predictions in various ways

    Parameters
    ----------
    batch : dict
        Batch from dataloader
    output : dict
        Output from model
    args : tuple
        Step arguments
    dataset : CfgNode
        Dataset configuration
    save : CfgNode
        Save configuration
    """
    # If there is no save folder, don't save
    if save.folder is '':
        return

    # If we want to save
    if save.depth.rgb or save.depth.viz or save.depth.npz or save.depth.png:
        # Retrieve useful tensors
        rgb = batch['rgb']
        pred_inv_depth = output['inv_depth']

        # Prepare path strings
        filename = batch['filename']
        dataset_idx = 0 if len(args) == 1 else args[1]
        save_path = os.path.join(save.folder, 'depth',
                                 prepare_dataset_prefix(dataset, dataset_idx),
                                 os.path.basename(save.pretrained).split('.')[0])
        # Create folder
        os.makedirs(save_path, exist_ok=True)

        # For each image in the batch
        length = rgb.shape[0]
        for i in range(length):
            # Save numpy depth maps
            if save.depth.npz:
                write_depth('{}/{}_depth.npz'.format(save_path, filename[i]),
                            depth=inv2depth(pred_inv_depth[i]),
                            intrinsics=batch['intrinsics'][i] if 'intrinsics' in batch else None)
            # Save png depth maps
            if save.depth.png:
                write_depth('{}/{}_depth.png'.format(save_path, filename[i]),
                            depth=inv2depth(pred_inv_depth[i]))
            # Save rgb images
            if save.depth.rgb:
                rgb_i = rgb[i].permute(1, 2, 0).detach().cpu().numpy() * 255
                write_image('{}/{}_rgb.png'.format(save_path, filename[i]), rgb_i)
            # Save inverse depth visualizations
            if save.depth.viz:
                viz_i = viz_inv_depth(pred_inv_depth[i]) * 255
                write_image('{}/{}_viz.png'.format(save_path, filename[i]), viz_i)
Ejemplo n.º 2
0
 def evaluate_depth(self, batch):
     """Evaluate batch to produce depth metrics."""
     # Get predicted depth
     output = self.model(batch)
     inv_depths = output['inv_depths']
     poses = output['poses']
     depth = inv2depth(inv_depths[0])
     # Post-process predicted depth
     batch['rgb'] = flip_lr(batch['rgb'])
     if 'rgb_context' in batch:
         batch['rgb_context'] = [
             flip_lr(img) for img in batch['rgb_context']
         ]
     batch['intrinsics'] = flip_lr_intr(batch['intrinsics'],
                                        width=depth.shape[3])
     inv_depths_flipped = self.model(batch)['inv_depths']
     inv_depth_pp = post_process_inv_depth(inv_depths[0],
                                           inv_depths_flipped[0],
                                           method='mean')
     depth_pp = inv2depth(inv_depth_pp)
     batch['rgb'] = flip_lr(batch['rgb'])
     # Calculate predicted metrics
     if 'pose_context' in batch.keys():
         pose_errs = compute_pose_metrics(self.config.model.params,
                                          gt=batch['pose_context'],
                                          pred=poses)
     else:
         pose_errs = [0, 0, 0]
     metrics = OrderedDict()
     if 'depth' in batch:
         for mode in self.metrics_modes:
             if self.config['datasets']['validation']['dataset'] == [
                     'Demon'
             ]:
                 metrics[self.metrics_name +
                         mode] = compute_depth_metrics_demon(
                             self.config.model.params,
                             gt=batch['depth'],
                             gt_pose=batch['pose_context'],
                             pred=depth_pp if 'pp' in mode else depth,
                             use_gt_scale='gt' in mode)
             else:
                 metrics[self.metrics_name + mode] = compute_depth_metrics(
                     self.config.model.params,
                     gt=batch['depth'],
                     pred=depth_pp if 'pp' in mode else depth,
                     use_gt_scale='gt' in mode)
             metrics[self.metrics_name + mode] = torch.cat([
                 metrics[self.metrics_name + mode],
                 torch.Tensor(pose_errs).to(depth_pp.device)
             ])
     # Return metrics and extra information
     return {'metrics': metrics, 'inv_depth': inv_depth_pp}
Ejemplo n.º 3
0
    def forward(self, batch, return_logs=False):
        """
        Processes a batch.

        Parameters
        ----------
        batch : dict
            Input batch
        return_logs : bool
            True if logs are stored

        Returns
        -------
        output : dict
            Dictionary containing predicted inverse depth maps and poses
        """
        # Generate inverse depth predictions
        inv_depths = self.compute_inv_depths(batch['rgb'])
        # Generate pose predictions if available
        pose = None
        if 'rgb_context' in batch and self.pose_net is not None:
            pose = self.compute_poses(batch['rgb'], batch['rgb_context'],
                                      batch["intrinsics"],
                                      inv2depth(inv_depths[0]))
        # Return output dictionary
        return {
            'inv_depths': inv_depths,
            'poses': pose,
        }
Ejemplo n.º 4
0
 def depth_cost_calc(self, inv_depth, fmap, fmaps_ref, pose_list, K, ref_K, scale_factor):
     cost_list = []
     for pose, fmap_r in zip(pose_list, fmaps_ref):
         cost = self.get_cost_each(pose, fmap, fmap_r, inv2depth(inv_depth), K, ref_K, scale_factor)
         cost_list.append(cost)  # (b, c,h, w)
     # cost = torch.stack(cost_list, dim=1).min(dim=1)[0]
     cost = torch.stack(cost_list, dim=1).mean(dim=1)
     return cost
Ejemplo n.º 5
0
def infer_and_save_pose(input_file_refs, input_file, model_wrapper, image_shape, data_type,
                        save_depth_root, save_vis_root):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file_refs : list(str)
        Reference image file paths
    input_file : str
        Image file for pose estimation
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """    
    
    base_name = os.path.splitext(os.path.basename(input_file))[0]
    
    image_raw_wh = load_image(input_file).size
    # Load image
    def process_image(filename):
        image = load_image(filename)
        # Resize and to tensor
        intr = get_intrinsics(image.size, image_shape, data_type) #(3, 3)
        image = resize_image(image, image_shape)
        image = to_tensor(image).unsqueeze(0)
        intr = torch.from_numpy(intr).unsqueeze(0) #(1, 3, 3)
        # Send image to GPU if available
        if torch.cuda.is_available():
            image = image.to('cuda')
            intr = intr.to('cuda')
        return image, intr
    image_ref = [process_image(input_file_ref)[0] for input_file_ref in input_file_refs]
    image, intrinsics = process_image(input_file)

    batch = {'rgb': image, 'rgb_context': image_ref, "intrinsics": intrinsics}
    
    output = model_wrapper(batch)
    inv_depth = output['inv_depths'][0] #(1, 1, h, w)
    depth = inv2depth(inv_depth)[0, 0].detach().cpu().numpy() #(h, w)
    
    pose21 = output['poses'][0].mat[0].detach().cpu().numpy() #(4, 4)  #TODO check: targe -> ref[0]
    pose23 = output['poses'][1].mat[0].detach().cpu().numpy() #(4, 4)  #TODO check: targe -> ref[0]

    vis_depth = viz_inv_depth(inv_depth[0]) * 255
    
    vis_depth_upsample = cv2.resize(vis_depth, image_raw_wh, interpolation=cv2.INTER_LINEAR)
    write_image(os.path.join(save_vis_root, f"{base_name}.jpg"), vis_depth_upsample)
    
    depth_upsample = cv2.resize(depth, image_raw_wh, interpolation=cv2.INTER_NEAREST)
    np.save(os.path.join(save_depth_root, f"{base_name}.npy"), depth_upsample)
    
    return depth, pose21, pose23, intrinsics[0].detach().cpu().numpy(), image[0].permute(1, 2, 0).detach().cpu().numpy() * 255
Ejemplo n.º 6
0
    def forward(self,
                image,
                context,
                inv_depths,
                gt_inv_depth,
                gt_pose_context,
                K,
                ref_K,
                poses,
                return_logs=False,
                progress=0.0):
        """
        Calculates training supervised loss.

        Parameters
        ----------
        inv_depths : list of torch.Tensor [B,1,H,W]
            Predicted depth maps for the original image, in all scales
        gt_inv_depth : torch.Tensor [B,1,H,W]
            Ground-truth depth map for the original image
        return_logs : bool
            True if logs are saved for visualization
        progress : float
            Training percentage

        Returns
        -------
        losses_and_metrics : dict
            Output dictionary
        """
        # If using progressive scaling
        self.n = len(inv_depths)  #self.progressive_scaling(progress)
        # Match predicted scales for ground-truth

        gt_inv_depths = match_scales(gt_inv_depth,
                                     inv_depths,
                                     self.n,
                                     mode='nearest',
                                     align_corners=None)

        # Calculate and store supervised loss
        loss_depth = self.calculate_loss(inv_depths, gt_inv_depths)

        loss_pose = self.calc_pose_loss(poses, gt_pose_context,
                                        inv2depth(gt_inv_depth), K, ref_K)

        self.add_metric('depth_loss', loss_depth)
        self.add_metric('pose_loss', loss_pose)
        self.add_metric('all_loss', loss_depth + loss_pose)

        loss = loss_depth + loss_pose

        # Return losses and metrics
        return {
            'loss': loss.unsqueeze(0),
            'metrics': self.metrics,
        }
    def warp_ref_image(self, inv_depths, ref_image, K, ref_K, pose):
        """
        Warps a reference image to produce a reconstruction of the original one.

        Parameters
        ----------
        inv_depths : torch.Tensor [B,1,H,W]
            Inverse depth map of the original image
        ref_image : torch.Tensor [B,3,H,W]
            Reference RGB image
        K : torch.Tensor [B,3,3]
            Original camera intrinsics
        ref_K : torch.Tensor [B,3,3]
            Reference camera intrinsics
        pose : Pose
            Original -> Reference camera transformation

        Returns
        -------
        ref_warped : torch.Tensor [B,3,H,W]
            Warped reference image (reconstructing the original one)
        """
        
        B, _, H, W = ref_image.shape
        device = ref_image.get_device()
        # Generate cameras for all scales
        cams, ref_cams = [], []
        for i in range(self.n):
            _, _, DH, DW = inv_depths[i].shape
            scale_factor = DW / float(W)
            cams.append(Camera(K=K.float()).scaled(scale_factor).to(device))
            ref_cams.append(Camera(K=ref_K.float(), Tcw=pose[i]).scaled(scale_factor).to(device))
        # View synthesis
        depths = [inv2depth(inv_depths[i]) for i in range(self.n)]
        ref_images = match_scales(ref_image, inv_depths, self.n)
        ref_warped = [view_synthesis(
            ref_images[i], depths[i], ref_cams[i], cams[i],
            padding_mode=self.padding_mode) for i in range(self.n)]
        # Return warped reference image
        return ref_warped
Ejemplo n.º 8
0
    def forward(self, target_image, ref_imgs, intrinsics):
        """ Estimate inv depth and  poses """
        # run the feature network
        fmaps = self.fnet(torch.cat([target_image] + ref_imgs, dim=0))
        fmaps = torch.split(fmaps, [target_image.shape[0]] * (1 + len(ref_imgs)), dim=0)
        fmap1, fmaps_ref = fmaps[0], fmaps[1:]
        assert target_image.shape[2] / fmap1.shape[2] == self.feat_ratio
            
        # initial pose
        pose_list_init = []
        for fmap_ref in fmaps_ref:
            pose_list_init.append(self.pose_head(torch.cat([fmap1, fmap_ref], dim=1)))   
        
        # initial depth
        inv_depth_init = self.depth_head(fmap1, act_fn=F.sigmoid)
        up_mask = self.upmask_net(fmap1)
        inv_depth_up_init = self.upsample_depth(inv_depth_init, up_mask, ratio=self.feat_ratio)

        inv_depth_predictions = [self.scale_inv_depth(inv_depth_up_init)[0]]
        pose_predictions = [[pose.clone() for pose in pose_list_init]]
        
        
        # run the context network for optimization
        if self.iters > 0:
            cnet_depth = self.cnet_depth(target_image)        
            hidden_d, inp_d = torch.split(cnet_depth, [self.hdim, self.cdim], dim=1)
            hidden_d = torch.tanh(hidden_d)
            inp_d = torch.relu(inp_d)
            
            img_pairs = []
            for ref_img in ref_imgs:
                img_pairs.append(torch.cat([target_image, ref_img], dim=1))  
            cnet_pose_list = self.cnet_pose(img_pairs)
            hidden_p_list, inp_p_list = [], []
            for cnet_pose in cnet_pose_list:
                hidden_p, inp_p = torch.split(cnet_pose, [self.hdim, self.cdim], dim=1)
                hidden_p_list.append(torch.tanh(hidden_p))
                inp_p_list.append(torch.relu(inp_p))
            
                
        # optimization start.................
        pose_list = pose_list_init
        inv_depth = inv_depth_init
        inv_depth_up = None
        for itr in range(self.iters):
            inv_depth = inv_depth.detach()
            pose_list = [pose.detach() for pose in pose_list]

            # calc cost
            pose_cost_func_list = []
            for fmap_ref in fmaps_ref:
                pose_cost_func_list.append(partial(self.get_cost_each, fmap=fmap1, fmap_ref=fmap_ref,
                                                   depth=inv2depth(self.scale_inv_depth(inv_depth)[0]),
                                                   K=intrinsics, ref_K=intrinsics, scale_factor=1.0/self.feat_ratio))

            depth_cost_func = partial(self.depth_cost_calc, fmap=fmap1, fmaps_ref=fmaps_ref,
                                      pose_list=pose_list, K=intrinsics,
                                      ref_K=intrinsics, scale_factor=1.0/self.feat_ratio)

    
            #########  update depth ##########
            hidden_d, up_mask_seqs, inv_depth_seqs = self.update_block_depth(hidden_d, depth_cost_func,
                                                                             inv_depth, inp_d,
                                                                             seq_len=self.seq_len, 
                                                                             scale_func=self.scale_inv_depth)
            
            if not self.inter_sup:
                up_mask_seqs, inv_depth_seqs = [up_mask_seqs[-1]], [inv_depth_seqs[-1]]
            # upsample predictions
            for up_mask_i, inv_depth_i in zip(up_mask_seqs, inv_depth_seqs):
                inv_depth_up = self.upsample_depth(inv_depth_i, up_mask_i, ratio=self.feat_ratio)
                inv_depth_predictions.append(self.scale_inv_depth(inv_depth_up)[0])
            inv_depth = inv_depth_seqs[-1]
            
            #########  update pose ###########
            pose_list_seqs = [None] * len(pose_list)
            for i, (pose, hidden_p) in enumerate(zip(pose_list, hidden_p_list)):
                hidden_p, pose_seqs = self.update_block_pose(hidden_p, pose_cost_func_list[i],
                                                             pose, inp_p_list[i], seq_len=self.seq_len)
                hidden_p_list[i] = hidden_p
                if not self.inter_sup:
                    pose_seqs = [pose_seqs[-1]]
                pose_list_seqs[i] = pose_seqs
                
            for pose_list_i in zip(*pose_list_seqs):
                pose_predictions.append([pose.clone() for pose in pose_list_i])

            pose_list = list(zip(*pose_list_seqs))[-1]
            

        if not self.training:
            return inv_depth_predictions[-1], \
                   torch.stack(pose_predictions[-1], dim=1).view(target_image.shape[0], len(ref_imgs), 6) #(b, n, 6)
                
        return inv_depth_predictions, \
               torch.stack([torch.stack(poses_ref, dim=1) for poses_ref in pose_predictions], dim=2) #(b, n, iters, 6)
Ejemplo n.º 9
0
def infer_and_save_depth(input_file, output_file, model_wrapper, image_shape,
                         half, save):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    if not is_image(output_file):
        # If not an image, assume it's a folder and append the input name
        os.makedirs(output_file, exist_ok=True)
        output_file = os.path.join(output_file, os.path.basename(input_file))

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    # Load image
    image = load_image(input_file)
    # Resize and to tensor
    image = resize_image(image, image_shape)
    image = to_tensor(image).unsqueeze(0)

    # Send image to GPU if available
    if torch.cuda.is_available():
        image = image.to('cuda:{}'.format(rank()), dtype=dtype)

    # Depth inference (returns predicted inverse depth)
    pred_inv_depth = model_wrapper.depth(image)[0]

    if save == 'npz' or save == 'png':
        # Get depth from predicted depth map and save to different formats
        filename = '{}.{}'.format(os.path.splitext(output_file)[0], save)
        print('Saving {} to {}'.format(
            pcolor(input_file, 'cyan', attrs=['bold']),
            pcolor(filename, 'magenta', attrs=['bold'])))
        write_depth(filename, depth=inv2depth(pred_inv_depth))
    else:
        # Prepare RGB image
        rgb = image[0].permute(1, 2, 0).detach().cpu().numpy() * 255
        depth = inv2depth(pred_inv_depth)[0].detach().cpu().numpy()
        print(depth.shape)

        h, w = rgb.shape[:2]
        fx = fy = w * 1.2
        cx = w / 2.0
        cy = h / 2.0
        generate_pointcloud(rgb, depth, fx, fy, cx, cy, "./kitti_hr_test.ply")

        np.savez("./data.npz", rgb=rgb, depth=depth)

        # Prepare inverse depth
        viz_pred_inv_depth = viz_inv_depth(pred_inv_depth[0]) * 255
        # Concatenate both vertically
        image = np.concatenate([rgb, viz_pred_inv_depth], 0)
        # Save visualization
        print('Saving {} to {}'.format(
            pcolor(input_file, 'cyan', attrs=['bold']),
            pcolor(output_file, 'magenta', attrs=['bold'])))
        imwrite(output_file, image[:, :, ::-1])