예제 #1
0
    def forward(self, batch, return_logs=False, progress=0.0):
        """
        Processes a batch.

        Parameters
        ----------
        batch : dict
            Input batch
        return_logs : bool
            True if logs are stored
        progress :
            Training progress percentage

        Returns
        -------
        output : dict
            Dictionary containing a "loss" scalar and different metrics and predictions
            for logging and downstream usage.
        """
        if not self.training:
            # If not training, no need for self-supervised loss
            return SfmModel.forward(self, batch)
        else:
            # Calculate predicted depth and pose output
            output = super().forward(batch, return_logs=return_logs)

            # Introduce poses ground_truth
            poses_gt = [[], []]
            poses_gt[0], poses_gt[1] = torch.zeros((6, 4, 4)), torch.zeros((6, 4, 4))
            for i in range(6):
                poses_gt[0][i] = batch['pose_context'][0][i].inverse() @ batch['pose'][i]
                poses_gt[1][i] = batch['pose_context'][1][i].inverse() @ batch['pose'][i]
            poses_gt = [Pose(poses_gt[0]), Pose(poses_gt[1])]

            multiview_loss = self.multiview_photometric_loss(
                batch['rgb_original'], batch['rgb_context_original'],
                output['inv_depths'], poses_gt, batch['intrinsics'], batch['extrinsics'],
                return_logs=return_logs, progress=progress)

            # loss = multiview_loss['loss']
            loss = 0.
            # Calculate supervised loss
            supervision_loss = self.supervised_loss(output['inv_depths'], depth2inv(batch['depth']),
                                                    return_logs=return_logs, progress=progress)
            loss += self.supervised_loss_weight * supervision_loss['loss']

            # Return loss and metrics
            return {
                'loss': loss,
                **merge_outputs(merge_outputs(multiview_loss, supervision_loss), output)
            }
예제 #2
0
 def compute_pose_net(self, image, contexts):
     """Compute poses from image and a sequence of context images"""
     pose_vec = self.pose_net(image, contexts)
     return [
         Pose.from_vec(pose_vec[:, i], self.rotation_mode)
         for i in range(pose_vec.shape[1])
     ]
    def __init__(self,
                 path_to_theta_lut,
                 poly_coeffs,
                 principal_point=torch.Tensor([0., 0.]),
                 scale_factors=torch.Tensor([1., 1.]),
                 Tcw=None):
        """
        Initializes the Camera class

        Parameters
        ----------
        intrinsics : dictionary (keys : ax, ay, cx, cy, c1, c2, c3, c5)
            Camera intrinsics
            poly_coeffs [c1, c2, c3, c4]
            principal_point [cx, cy]
            scale_factors [ax, ay]
        Tcw : Pose
            Camera -> World pose transformation
        """
        super().__init__()
        self.path_to_theta_lut = path_to_theta_lut
        self.poly_coeffs = poly_coeffs
        self.principal_point = principal_point
        self.scale_factors = scale_factors
        #self.K = K
        self.Tcw = Pose.identity(
            len(poly_coeffs)
        ) if Tcw is None else Tcw  #Pose.identity(len(K)) if Tcw is None else Tcw
    def __init__(self,
                 poly_coeffs, principal_point, scale_factors,
                 K, k1, k2, k3, p1, p2,
                 camera_type, #int Tensor ; 0 is fisheye, 1 is distorted, 2 is other
                 Tcw=None):
        """
        Initializes the Camera class

        Parameters
        ----------
        intrinsics : dictionary (keys : ax, ay, cx, cy, c1, c2, c3, c5)
            Camera intrinsics
            poly_coeffs [c1, c2, c3, c4]
            principal_point [cx, cy]
            scale_factors [ax, ay]
        Tcw : Pose
            Camera -> World pose transformation
        """
        super().__init__()
        self.poly_coeffs = poly_coeffs
        self.principal_point = principal_point
        self.scale_factors = scale_factors
        self.K = K
        self.k1 = k1
        self.k2 = k2
        self.k3 = k3
        self.p1 = p1
        self.p2 = p2
        self.camera_type = camera_type
        self.Tcw = Pose.identity(len(camera_type)) if Tcw is None else Tcw
예제 #5
0
    def __init__(self, K, Tcw=None):
        """
        Initializes the Camera class

        Parameters
        ----------
        K : torch.Tensor [3,3]
            Camera intrinsics
        Tcw : Pose
            Camera -> World pose transformation
        """
        super().__init__()
        self.K = K
        self.Tcw = Pose.identity(len(K)) if Tcw is None else Tcw
예제 #6
0
    def __init__(self, R, Tcw=None):
        """
        Initializes the Camera class

        Parameters
        ----------
        R : torch.Tensor [B, 3, H, W]
            Camera ray surface
        Tcw : Pose
            Camera -> World pose transformation
        """
        super().__init__()
        self.ray_surface = R
        self.Tcw = Pose.identity(1) if Tcw is None else Tcw
예제 #7
0
        def pcl_distance(extra_rot_deg):
            for i_cam in range(4):
                base_folder_str = get_base_folder(input_files[i_cam][0])
                split_type_str = get_split_type(input_files[i_cam][0])
                seq_name_str = get_sequence_name(input_files[i_cam][0])
                camera_str = get_camera_name(input_files[i_cam][0])

                calib_data = {}
                calib_data[camera_str] = read_raw_calib_files_camera_valeo(
                    base_folder_str, split_type_str, seq_name_str, camera_str)
                pose_matrix = torch.from_numpy(
                    get_extrinsics_pose_matrix_extra_rot(
                        input_files[i_cam][0], calib_data,
                        extra_rot_deg[3 * i_cam + 0],
                        extra_rot_deg[3 * i_cam + 1],
                        extra_rot_deg[3 * i_cam + 2])).unsqueeze(0)
                pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()),
                                                   dtype=dtype)
                CameraFisheye.Twc.fget.cache_clear()
                cams[i_cam].Tcw = pose_tensor

                world_points[i_cam] = cams[i_cam].reconstruct(
                    pred_depths[i_cam], frame='w')

                world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
                world_points[i_cam] = world_points[i_cam].reshape(
                    (3, -1)).transpose()
                world_points[i_cam] = world_points[i_cam][
                    not_masked[i_cam]].astype(np.float64)
                pcl_full[i_cam].points = o3d.utility.Vector3dVector(
                    world_points[i_cam])

            dist_total = np.zeros(4)
            for i_cam in range(4):
                i_cam1 = i_cam
                i_cam2 = (i_cam + 1) % 4
                pcl1 = pcl_full[i_cam1].select_by_index(
                    ind_clean_right[i_cam1]).uniform_down_sample(
                        100).select_by_index(close_points1[i_cam])
                pcl2 = pcl_full[i_cam2].select_by_index(
                    ind_clean_left[i_cam2]).uniform_down_sample(
                        100).select_by_index(close_points2[i_cam])
                dists = pcl1.compute_point_cloud_distance(pcl2)
                dist_total[i_cam] = np.mean(np.asarray(dists))

            return np.mean(dist_total)
예제 #8
0
                              np.array([[0.046296, -7.33178]])).float(),
                          scale_factors=torch.from_numpy(np.array([[1.0, 1.0]
                                                                   ])).float())

cam_left = CameraFisheye(path_to_theta_lut=[
    '/home/data/vbelissen/valeo_multiview/calibrations_theta_lut/fisheye/test/20170320_163113/20170320_163113_cam_0_1280_800.npy'
],
                         path_to_ego_mask=[''],
                         poly_coeffs=torch.from_numpy(
                             np.array([[282.85, -27.8671, 114.318,
                                        -36.6703]])).float(),
                         principal_point=torch.from_numpy(
                             np.array([[0.046296, -7.33178]])).float(),
                         scale_factors=torch.from_numpy(np.array([[1.0, 1.0]
                                                                  ])).float(),
                         Tcw=Pose(pose_matrix_left_torch))

cam_front = cam_front.to(torch.device('cuda'))
cam_left = cam_left.to(torch.device('cuda'))

world_points = cam_front.reconstruct(simulated_depth, frame='w')

ref_coords_left = cam_left.project(world_points, frame='w')

warped_front_left = funct.grid_sample(left_img_torch,
                                      ref_coords_left,
                                      mode='bilinear',
                                      padding_mode='zeros',
                                      align_corners=True)

warped_front_left_PIL = torch.transpose(warped_front_left.unsqueeze(4), 1,
예제 #9
0
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers,
                               image_shape):
    """
    Process a list of input files to plot and save visualization.

    Parameters
    ----------
    input_files : list (number of cameras) of lists (number of files) of str
        Image file
    output_folder : str
        Output folder where the output will be saved
    model_wrappers : nn.Module
        List of model wrappers used for inference
    image_shape : Image shape
        Input image shape
    """
    N_cams = len(input_files)
    N_files = len(input_files[0])

    camera_names = []
    for i_cam in range(N_cams):
        camera_names.append(get_camera_name(input_files[i_cam][0]))

    cams = []
    not_masked = []

    # Depth limits if specified
    if args.max_depths is not None:
        cap_depths = True
        depth_limits = [0.9 * m for m in args.max_depths]
    else:
        cap_depths = False

    # Let's assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str = get_split_type(input_files[i_cam][0])
        seq_name_str = get_sequence_name(input_files[i_cam][0])
        camera_str = get_camera_name(input_files[i_cam][0])

        calib_data = {}
        calib_data[camera_str] = read_raw_calib_files_camera_valeo_with_suffix(
            base_folder_str, split_type_str, seq_name_str, camera_str,
            args.calibrations_suffix)

        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors, K, k, p = get_full_intrinsics(
            input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        K = torch.from_numpy(K).unsqueeze(0)
        k = torch.from_numpy(k).unsqueeze(0)
        p = torch.from_numpy(p).unsqueeze(0)
        pose_matrix = torch.from_numpy(
            get_extrinsics_pose_matrix(input_files[i_cam][0],
                                       calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)
        camera_type = get_camera_type(input_files[i_cam][0], calib_data)
        camera_type_int = torch.tensor([get_camera_type_int(camera_type)])

        # Initialization of cameras
        cams.append(
            CameraMultifocal(poly_coeffs=poly_coeffs.float(),
                             principal_point=principal_point.float(),
                             scale_factors=scale_factors.float(),
                             K=K.float(),
                             k1=k[:, 0].float(),
                             k2=k[:, 1].float(),
                             k3=k[:, 2].float(),
                             p1=p[:, 0].float(),
                             p2=p[:, 1].float(),
                             camera_type=camera_type_int,
                             Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()))

        # Using ego masks to create a "not_masked" bool array
        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(ego_mask.astype(bool).reshape(-1))

    # Computing the approximate center of the car as the barycenter of cameras
    cams_middle = np.zeros(3)
    i_cc_max = min(N_cams, 4)
    for c in range(3):
        for i_cc in range(i_cc_max):
            cams_middle[c] += cams[i_cc].Twc.mat.cpu().numpy()[0, c,
                                                               3] / i_cc_max

    # Create output dirs for each cam
    seq_name = get_sequence_name(input_files[0][0])
    for i_cam in range(N_cams):
        os.makedirs(os.path.join(output_folder, seq_name, 'depth',
                                 camera_names[i_cam]),
                    exist_ok=True)
        os.makedirs(os.path.join(output_folder, seq_name, 'rgb',
                                 camera_names[i_cam]),
                    exist_ok=True)

    first_pic = True  # The first picture will be used to create a special sequence

    # Iteration on files, using a specified step
    for i_file in range(0, N_files, args.step_in_input):

        base_0, ext_0 = os.path.splitext(
            os.path.basename(input_files[0][i_file]))
        print(base_0)

        # Initialize empty arrays that will be populated by each cam
        images = []
        images_numpy = []
        predicted_masks = []
        pred_inv_depths = []
        pred_depths = []
        world_points = []
        input_depth_files = []
        has_gt_depth = []
        input_pred_masks = []
        has_pred_mask = []
        gt_depth = []
        gt_depth_3d = []
        pcl_full = []
        pcl_only_inliers = []
        pcl_gt = []
        rgb = []
        viz_pred_inv_depths = []
        great_lap = []
        mask_depth_capped = []
        final_masks = []

        # First iteration on cameras: loading images, semantic masks and prediction depth
        for i_cam in range(N_cams):
            # Loading rgb images
            images.append(
                load_image(input_files[i_cam][i_file]).convert('RGB'))
            images[i_cam] = resize_image(images[i_cam], image_shape)
            images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
            if torch.cuda.is_available():
                images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()))

            # If specified, loading predicted semantic masks
            if args.load_pred_masks:
                input_pred_mask_file = input_files[i_cam][i_file].replace(
                    'images_multiview', 'pred_mask')
                input_pred_masks.append(input_pred_mask_file)
                has_pred_mask.append(os.path.exists(input_pred_masks[i_cam]))
                predicted_masks.append(
                    load_image(input_pred_mask_file).convert('RGB'))
                predicted_masks[i_cam] = resize_image(predicted_masks[i_cam],
                                                      image_shape)
                predicted_masks[i_cam] = to_tensor(
                    predicted_masks[i_cam]).unsqueeze(0)
                if torch.cuda.is_available():
                    predicted_masks[i_cam] = predicted_masks[i_cam].to(
                        'cuda:{}'.format(rank()))

            # Predicting depth using loaded wrappers
            pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam]))
            pred_depths.append(inv2depth(pred_inv_depths[i_cam]))

        # Second iteration on cameras: reconstructing 3D and applying masks
        for i_cam in range(N_cams):
            # If specified, create a unified depth map (still in beta)
            if args.mix_depths:
                # TODO : adapter au cas ou la Long Range est incluse
                # Depth will be unified between original camera and left/right camera (thus 3 cameras)
                depths = (torch.ones(1, 3, H, W) * 500).cuda()
                depths[0, 1, :, :] = pred_depths[i_cam][0, 0, :, :]

                # Iteration on left/right cameras
                for relative in [-1, 1]:
                    # Load ego mask
                    path_to_ego_mask_relative = get_path_to_ego_mask(
                        input_files[(i_cam + relative) % 4][0])
                    ego_mask_relative = np.load(path_to_ego_mask_relative)
                    ego_mask_relative = torch.from_numpy(
                        ego_mask_relative.astype(bool))

                    # Reconstruct 3d points from relative depth map
                    relative_points_3d = cams[(i_cam + relative) %
                                              4].reconstruct(
                                                  pred_depths[(i_cam +
                                                               relative) % 4],
                                                  frame='w')

                    # Center of projection of current cam
                    cop = np.zeros((3, H, W))
                    for c in range(3):
                        cop[c, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, c,
                                                                         3]

                    # distances of 3d points to cop of current cam
                    distances_3d = np.linalg.norm(
                        relative_points_3d[0, :, :, :].cpu().numpy() - cop,
                        axis=0)
                    distances_3d = torch.from_numpy(distances_3d).unsqueeze(
                        0).cuda().float()

                    # projected points on current cam (values should be in (-1,1)), be careful X and Y are switched!
                    projected_points_2d = cams[i_cam].project(
                        relative_points_3d, frame='w')
                    projected_points_2d[:, :, :,
                                        [0, 1]] = projected_points_2d[:, :, :,
                                                                      [1, 0]]

                    # applying ego mask of relative cam
                    projected_points_2d[:, ~ego_mask_relative, :] = 2

                    # looking for indices of inbound pixels
                    x_ok = (projected_points_2d[0, :, :, 0] >
                            -1) * (projected_points_2d[0, :, :, 0] < 1)
                    y_ok = (projected_points_2d[0, :, :, 1] >
                            -1) * (projected_points_2d[0, :, :, 1] < 1)
                    xy_ok = x_ok * y_ok
                    xy_ok_id = xy_ok.nonzero(as_tuple=False)

                    # xy values of these indices (in (-1, 1))
                    xy_ok_X = xy_ok_id[:, 0]
                    xy_ok_Y = xy_ok_id[:, 1]

                    # xy values in pixels
                    projected_points_2d_ints = (projected_points_2d + 1) / 2
                    projected_points_2d_ints[0, :, :, 0] = torch.round(
                        projected_points_2d_ints[0, :, :, 0] * (H - 1))
                    projected_points_2d_ints[0, :, :, 1] = torch.round(
                        projected_points_2d_ints[0, :, :, 1] * (W - 1))
                    projected_points_2d_ints = projected_points_2d_ints.to(
                        dtype=int)

                    # main equation
                    depths[0, 1 + relative,
                           projected_points_2d_ints[0, xy_ok_X, xy_ok_Y, 0],
                           projected_points_2d_ints[0, xy_ok_X, xy_ok_Y,
                                                    1]] = distances_3d[0,
                                                                       xy_ok_X,
                                                                       xy_ok_Y]

                    # Interpolate if requested
                    if args.mix_depths_interpolate:
                        dd = depths[0, 1 + relative, :, :].cpu().numpy()
                        dd[dd == 500] = np.nan
                        dd = fillMissingValues(dd,
                                               copy=True,
                                               interpolator=scipy.interpolate.
                                               LinearNDInterpolator)
                        dd[np.isnan(dd)] = 500
                        dd[dd == 0] = 500
                        depths[0, 1 + relative, :, :] = torch.from_numpy(
                            dd).unsqueeze(0).unsqueeze(0).cuda()

                # Unify depth maps by taking min
                depths[depths == 0] = 500
                pred_depths[i_cam] = depths.min(dim=1, keepdim=True)[0]

            # Reconstruct 3D points
            world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam],
                                                        frame='w'))

            # Switch to numpy format for use in Open3D
            images_numpy.append(images[i_cam][0].cpu().numpy())

            # Reshape to something like a list of pixels for Open3D
            images_numpy[i_cam] = images_numpy[i_cam].reshape(
                (3, -1)).transpose()

            # Copy of predicted depth to be used for computing masks
            pred_depth_copy = pred_depths[i_cam].squeeze(0).squeeze(
                0).cpu().numpy()
            pred_depth_copy = np.uint8(pred_depth_copy)

            # Final masks are initialized with ego masks
            final_masks.append(not_masked[i_cam])

            # If depths are to be capped, final masks are updated
            if cap_depths:
                mask_depth_capped.append(
                    (pred_depth_copy < depth_limits[i_cam]).reshape(-1))
                final_masks[
                    i_cam] = final_masks[i_cam] * mask_depth_capped[i_cam]

            # If a criterion on laplacian is to be applied, final masks are updated
            if args.clean_with_laplacian:
                lap = np.uint8(
                    np.absolute(
                        cv2.Laplacian(pred_depth_copy, cv2.CV_64F, ksize=3)))
                great_lap.append(lap < args.lap_threshold)
                great_lap[i_cam] = great_lap[i_cam].reshape(-1)
                final_masks[i_cam] = final_masks[i_cam] * great_lap[i_cam]

            # Apply mask on images
            images_numpy[i_cam] = images_numpy[i_cam][final_masks[i_cam]]

            # Apply mask on semantic masks if applicable
            if args.load_pred_masks:
                predicted_masks[i_cam] = predicted_masks[i_cam][0].cpu().numpy(
                )
                predicted_masks[i_cam] = predicted_masks[i_cam].reshape(
                    (3, -1)).transpose()
                predicted_masks[i_cam] = predicted_masks[i_cam][
                    final_masks[i_cam]]

        # Third iteration on cameras:
        for i_cam in range(N_cams):
            # Transforming predicted 3D points into numpy and into a list-like format, then applying masks
            world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
            world_points[i_cam] = world_points[i_cam].reshape(
                (3, -1)).transpose()
            world_points[i_cam] = world_points[i_cam][final_masks[i_cam]]

            # Look for ground truth depth data, if yes then reconstruct 3d from it
            input_depth_files.append(
                get_depth_file(input_files[i_cam][i_file], args.depth_suffix))
            has_gt_depth.append(os.path.exists(input_depth_files[i_cam]))
            if has_gt_depth[i_cam]:
                gt_depth.append(
                    np.load(input_depth_files[i_cam])['velodyne_depth'].astype(
                        np.float32))
                gt_depth[i_cam] = torch.from_numpy(
                    gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
                if torch.cuda.is_available():
                    gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(
                        rank()))
                gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam],
                                                           frame='w'))
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy()
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape(
                    (3, -1)).transpose()
            else:
                gt_depth.append(0)
                gt_depth_3d.append(0)

            # Initialize full pointcloud
            pcl_full.append(o3d.geometry.PointCloud())
            pcl_full[i_cam].points = o3d.utility.Vector3dVector(
                world_points[i_cam])
            pcl_full[i_cam].colors = o3d.utility.Vector3dVector(
                images_numpy[i_cam])

            pcl = pcl_full[i_cam]  # .select_by_index(ind)

            # Set of operations to remove outliers
            # We remove points that are below -1.0 meter, or points that are higher than 1.5 meters
            # and of blue/green color (typically the sky)
            # TODO: replace with semantic "sky" masks
            points_tmp = np.asarray(pcl.points)
            colors_tmp = images_numpy[i_cam]  # np.asarray(pcl.colors)
            mask_below = points_tmp[:, 2] < -1.0
            mask_height = points_tmp[:,
                                     2] > 1.5  # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3)
            mask_colors_blue1 = np.sum(
                np.abs(colors_tmp - np.array([0.6, 0.8, 1.0])), axis=1) < 0.6
            mask_colors_blue2 = np.sum(
                np.abs(colors_tmp - np.array([0.8, 1.0, 1.0])), axis=1) < 0.6
            mask_colors_green1 = np.sum(
                np.abs(colors_tmp - np.array([0.2, 1.0, 0.40])), axis=1) < 0.8
            mask_colors_green2 = np.sum(
                np.abs(colors_tmp - np.array([0.0, 0.5, 0.15])), axis=1) < 0.2
            mask = (1 - mask_below) \
                   * (1 - mask_height * mask_colors_blue1) \
                   * (1 - mask_height * mask_colors_blue2) \
                   * (1 - mask_height * mask_colors_green1) \
                   * (1 - mask_height * mask_colors_green2)

            # Colorize with semantic masks if specified
            if args.load_pred_masks:
                # Remove black/grey pixels from predicted semantic masks
                black_pixels = np.logical_or(
                    np.sum(np.abs(predicted_masks[i_cam] * 255 -
                                  np.array([0, 0, 0])),
                           axis=1) < 15,
                    np.sum(np.abs(predicted_masks[i_cam] * 255 -
                                  np.array([127, 127, 127])),
                           axis=1) < 20)
                ind_black_pixels = np.where(black_pixels)[0]
                # colorizing pixels, from a mix of semantic predictions and rgb values
                color_vector = args.alpha_mask_semantic * predicted_masks[
                    i_cam] + (1 -
                              args.alpha_mask_semantic) * images_numpy[i_cam]
                color_vector[ind_black_pixels] = images_numpy[i_cam][
                    ind_black_pixels]
                pcl_full[i_cam].colors = o3d.utility.Vector3dVector(
                    color_vector)
                pcl = pcl_full[i_cam]

            # Colorize each camera differently if specified
            if args.colorize_cameras:
                colors_tmp = args.alpha_colorize_cameras * color_list_cameras[
                    i_cam] + (
                        1 - args.alpha_colorize_cameras) * images_numpy[i_cam]
                pcl.colors = o3d.utility.Vector3dVector(colors_tmp)

            # Apply color mask
            pcl = pcl.select_by_index(np.where(mask)[0])

            # Remove outliers based on the number of neighbors, if specified
            if args.remove_neighbors_outliers:
                pcl, ind = pcl.remove_statistical_outlier(
                    nb_neighbors=7,
                    std_ratio=args.remove_neighbors_outliers_std)
                pcl = pcl.select_by_index(ind)

            # Downsample based on a voxel size, if specified
            if args.voxel_downsample:
                pcl = pcl.voxel_down_sample(
                    voxel_size=args.voxel_size_downsampling)

            # Rename variable pcl
            pcl_only_inliers.append(pcl)

            # Ground-truth pointcloud: define its color with a colormap (plasma)
            if has_gt_depth[i_cam]:
                pcl_gt.append(o3d.geometry.PointCloud())
                pcl_gt[i_cam].points = o3d.utility.Vector3dVector(
                    gt_depth_3d[i_cam])
                gt_inv_depth = 1 / (np.linalg.norm(
                    gt_depth_3d[i_cam] - cams_middle, axis=1) + 1e-6)
                cm = get_cmap('plasma')
                normalizer = .35
                gt_inv_depth /= normalizer
                pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(
                    cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3])
            else:
                pcl_gt.append(0)

        # Downsample pointclouds based on the presence of nearby pointclouds with higher priority
        # (i.e. lidar or semantized pointcloud)
        if args.remove_close_points_lidar_semantic:
            threshold_depth2depth = 0.5
            threshold_depth2lidar = 0.1
            for i_cam in range(4):
                if has_pred_mask[i_cam]:
                    for relative in [-1, 1]:
                        if not has_pred_mask[(i_cam + relative) % 4]:
                            dists = pcl_only_inliers[
                                (i_cam + relative) %
                                4].compute_point_cloud_distance(
                                    pcl_only_inliers[i_cam])
                            p1 = pcl_only_inliers[
                                (i_cam + relative) % 4].select_by_index(
                                    np.where(
                                        np.asarray(dists) >
                                        threshold_depth2depth)[0])
                            p2 = pcl_only_inliers[(
                                i_cam + relative
                            ) % 4].select_by_index(
                                np.where(
                                    np.asarray(dists) > threshold_depth2depth)
                                [0],
                                invert=True).uniform_down_sample(
                                    15)  #.voxel_down_sample(voxel_size=0.5)
                            pcl_only_inliers[(i_cam + relative) % 4] = p1 + p2
                if has_gt_depth[i_cam]:
                    down = 15 if has_pred_mask[i_cam] else 30
                    dists = pcl_only_inliers[
                        i_cam].compute_point_cloud_distance(pcl_gt[i_cam])
                    p1 = pcl_only_inliers[i_cam].select_by_index(
                        np.where(np.asarray(dists) > threshold_depth2lidar)[0])
                    p2 = pcl_only_inliers[i_cam].select_by_index(
                        np.where(np.asarray(dists) > threshold_depth2lidar)[0],
                        invert=True).uniform_down_sample(
                            down)  #.voxel_down_sample(voxel_size=0.5)
                    pcl_only_inliers[i_cam] = p1 + p2

        # Define the POV list
        pov_list = pov_dict[args.pov_file_dict]
        n_povs = len(pov_list)

        # If specified, the first pic is frozen and the camera moves in the scene
        if args.plot_pov_sequence_first_pic:
            if first_pic:
                # Loop on POVs
                for i_cam_n in range(n_povs):
                    vis_only_inliers = o3d.visualization.Visualizer()
                    vis_only_inliers.create_window(visible=True,
                                                   window_name='inliers' +
                                                   str(i_file))
                    for i_cam in range(N_cams):
                        vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
                    for i, e in enumerate(pcl_gt):
                        if e != 0:
                            vis_only_inliers.add_geometry(e)
                    ctr = vis_only_inliers.get_view_control()
                    ctr.set_lookat(lookat_vector)
                    ctr.set_front(front_vector)
                    ctr.set_up(up_vector)
                    ctr.set_zoom(zoom_float)
                    param = o3d.io.read_pinhole_camera_parameters(
                        pov_list[i_cam_n]
                    )  #('/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'+str(i_cam_n)+'v3.json')
                    ctr.convert_from_pinhole_camera_parameters(param)
                    opt = vis_only_inliers.get_render_option()
                    opt.background_color = np.asarray([0, 0, 0])
                    opt.point_size = 3.0
                    #opt.light_on = False
                    #vis_only_inliers.update_geometry('inliers0')
                    vis_only_inliers.poll_events()
                    vis_only_inliers.update_renderer()
                    if args.stop:
                        vis_only_inliers.run()
                        pcd1 = pcl_only_inliers[0]
                        for i in range(1, N_cams):
                            pcd1 = pcd1 + pcl_only_inliers[i]
                        for i_cam3 in range(N_cams):
                            if has_gt_depth[i_cam3]:
                                pcd1 += pcl_gt[i_cam3]
                        #o3d.io.write_point_cloud(os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1)
                    # = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
                    #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
                    if args.save_visualization:
                        image = vis_only_inliers.capture_screen_float_buffer(
                            False)
                        plt.imsave(os.path.join(output_folder, seq_name, 'pcl',
                                                base_0,
                                                str(i_cam_n) + '.png'),
                                   np.asarray(image),
                                   dpi=1)
                    vis_only_inliers.destroy_window()
                    del ctr
                    del vis_only_inliers
                    del opt
                first_pic = False

        # Plot point cloud
        vis_only_inliers = o3d.visualization.Visualizer()
        vis_only_inliers.create_window(visible=True,
                                       window_name='inliers' + str(i_file))
        for i_cam in range(N_cams):
            vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
        if args.print_lidar:
            for i, e in enumerate(pcl_gt):
                if e != 0:
                    vis_only_inliers.add_geometry(e)
        ctr = vis_only_inliers.get_view_control()
        ctr.set_lookat(lookat_vector)
        ctr.set_front(front_vector)
        ctr.set_up(up_vector)
        ctr.set_zoom(zoom_float)
        param = o3d.io.read_pinhole_camera_parameters(
            pov_list[-1]
        )  #('/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'+str(119)+'v3.json')
        ctr.convert_from_pinhole_camera_parameters(param)
        opt = vis_only_inliers.get_render_option()
        opt.background_color = np.asarray([0, 0, 0])
        opt.point_size = 3.0
        #opt.light_on = False
        #vis_only_inliers.update_geometry('inliers0')
        vis_only_inliers.poll_events()
        vis_only_inliers.update_renderer()
        if args.stop:
            vis_only_inliers.run()
            pcd1 = pcl_only_inliers[0]
            for i in range(1, N_cams):
                pcd1 = pcd1 + pcl_only_inliers[i]
            for i_cam3 in range(N_cams):
                if has_gt_depth[i_cam3]:
                    pcd1 += pcl_gt[i_cam3]
            o3d.io.write_point_cloud(
                os.path.join(output_folder, seq_name, 'open3d',
                             base_0 + '.pcd'), pcd1)
        #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
        #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
        if args.save_visualization:
            image = vis_only_inliers.capture_screen_float_buffer(False)
            plt.imsave(os.path.join(output_folder, seq_name, 'pcl', 'normal',
                                    base_0 + '_normal_.png'),
                       np.asarray(image),
                       dpi=1)
        vis_only_inliers.destroy_window()
        del ctr
        del vis_only_inliers
        del opt

        # Save RGB and depth maps
        for i_cam in range(N_cams):
            rgb.append(
                images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255)
            viz_pred_inv_depths.append(
                viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.4) * 255)
            viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape)
                                       == 0] = 0
            # Save visualization
            if args.save_visualization:
                output_file1 = os.path.join(
                    output_folder, seq_name, 'depth', camera_names[i_cam],
                    os.path.basename(input_files[i_cam][i_file]))
                output_file2 = os.path.join(
                    output_folder, seq_name, 'rgb', camera_names[i_cam],
                    os.path.basename(input_files[i_cam][i_file]))
                imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1])
                imwrite(output_file2, rgb[i_cam][:, :, ::-1])
예제 #10
0
    def forward(self,
                image,
                context,
                inv_depths,
                K,
                ref_K,
                extrinsics,
                poses,
                return_logs=False,
                progress=0.0):
        """
        Calculates training photometric loss.
        (Here we need to consider temporal, spatial and temporal-spatial wise losses)

        Parameters
        ----------
        image : torch.Tensor [B,3,H,W]
            Original image
        context : list of torch.Tensor [B,3,H,W]
            Context containing a list of reference images
        inv_depths : list of torch.Tensor [B,1,H,W]
            Predicted depth maps for the original image, in all scales
        K : torch.Tensor [B,3,3]
            Original camera intrinsics
        ref_K : torch.Tensor [B,3,3]
            Reference camera intrinsics
        poses : list of Pose
            Camera transformation between original and context
        return_logs : bool
            True if logs are saved for visualization
        progress : float
            Training percentage

        Returns
        -------
        losses_and_metrics : dict
            Output dictionary
        """

        # Step 0: prepare for loss calculation
        # Reorganize the order of camearas -- (See more API information about the datasets)
        image = self.sort_cameras_tensor(image)
        context = self.sort_cameras_tensor(context)
        inv_depths = self.sort_cameras_tensor(inv_depths)
        K = self.sort_cameras_tensor(K)
        ref_K = K
        poses = [
            Pose(self.sort_cameras_tensor(poses[0].item())),
            Pose(self.sort_cameras_tensor(poses[1].item()))
        ]

        # If using progressive scaling
        self.n = self.progressive_scaling(progress)
        # Loop over all reference images
        photometric_losses = [[] for _ in range(self.n)]
        images = match_scales(image, inv_depths, self.n)
        extrinsics = torch.tensor(extrinsics,
                                  dtype=torch.float32,
                                  device="cuda")
        # Step 1: Calculate the losses temporal-wise
        for j, (ref_image, pose) in enumerate(zip(context, poses)):
            # Calculate warped images
            ref_warped = self.warp_ref_image_temporal(inv_depths, ref_image, K,
                                                      ref_K, pose)

            # Calculate and store image loss

            # print('### poses shape', len(poses))
            # print('poses[0].shape:', poses[0].shape)
            # print('###multiview_photometric_loss printing ref_warped')
            # print('len of images: ',len(images))
            # print('shape of images[0]: ', images[0].shape)
            # print('len of context: ',len(context))
            # print('shape of context[0]:', context[0].shape)
            # print('len of ref_warped: ',len(ref_warped))
            # print('shape of ref_warped[0]', ref_warped[0].shape)

            # pic_orig = images[0][0].cpu().clone()
            # pic_orig = (pic_orig.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8)
            # pic_ref = context[0][0].cpu().clone()
            # pic_ref = (pic_ref.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8)
            # pic_warped = ref_warped[0][0].cpu().clone()
            # pic_warped = (pic_warped.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8)
            # final_frame = cv2.hconcat((pic_orig, pic_ref, pic_warped))
            # cv2.imshow('temporal warping', final_frame)
            # cv2.waitKey()

            photometric_loss = self.calc_photometric_loss(ref_warped, images)
            for i in range(self.n):
                photometric_losses[i].append(self.temporal_loss_weight *
                                             photometric_loss[i])
            # If using automask
            if self.automask_loss:
                # Calculate and store unwarped image loss
                ref_images = match_scales(ref_image, inv_depths, self.n)
                unwarped_image_loss = self.calc_photometric_loss(
                    ref_images, images)
                for i in range(self.n):
                    photometric_losses[i].append(self.temporal_loss_weight *
                                                 unwarped_image_loss[i])

        # Step 2: Calculate the losses spatial-wise
        # reconstruct context images
        num_cameras, C, H, W = image.shape  # should be (6, 3, H, W)
        left_swap = [i for i in range(-1, num_cameras - 1)]
        right_swap = [i % 6 for i in range(1, num_cameras + 1)]

        # for i in range(num_cameras):
        #     pic = image[i].cpu().clone()
        #     pic = (pic.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8)
        #     if i == 0:
        #         final_file = pic
        #     else:
        #         final_file = cv2.hconcat((final_file, pic))
        # cv2.imshow('6 images', final_file)
        # cv2.waitKey()

        context_spatial = [[], []]  # [[B,3,H,W],[B,3,H,W]]
        context_spatial[0] = image[left_swap, ...]
        context_spatial[1] = image[right_swap, ...]
        K_spatial = K  # tensor [B,3,3]
        ref_K_spatial = [[], []]  # [[B,3,3],[B,3,3]]
        ref_K_spatial[0] = K[left_swap, ...]
        ref_K_spatial[1] = K[right_swap, ...]
        # reconstruct extrinsics
        extrinsics_1 = extrinsics  # [B,4,4]
        extrinsics_2 = [[], []]  # [[B,4,4],[B,4,4]]
        extrinsics_2[0] = extrinsics_1[left_swap, ...]
        extrinsics_2[1] = extrinsics_1[right_swap, ...]
        # calculate spatial-wise photometric loss
        for j, ref_image in enumerate(context_spatial):
            # Calculate warped images
            ref_warped, valid_points_masks = self.warp_ref_image_spatial(
                inv_depths, ref_image, K_spatial, ref_K_spatial[j],
                Pose(extrinsics_1), Pose(extrinsics_2[j]))

            # pic_orig = images[0][1].cpu().clone()
            # pic_orig = (pic_orig.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8)
            # pic_ref = context_spatial[0][1].cpu().clone()
            # pic_ref = (pic_ref.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8)
            # pic_warped = ref_warped[0][1].cpu().clone()
            # pic_warped = (pic_warped.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8)
            # pic_valid = valid_points_masks[0][1].cpu().clone()
            # pic_valid = (pic_valid.permute(1,2,0).detach().numpy()*255).astype(np.uint8)
            # final_frame = cv2.hconcat((pic_orig, pic_ref, pic_warped))
            # cv2.imshow('spatial warping', final_frame)
            # cv2.waitKey()
            # # cv2.imshow('pic_valid', pic_valid)
            # # cv2.waitKey()

            # Calculate and store image loss
            photometric_loss = [
                self.calc_photometric_loss(ref_warped, images)[i] *
                valid_points_masks[i] for i in range(len(valid_points_masks))
            ]
            for i in range(self.n):
                photometric_losses[i].append(self.spatial_loss_weight *
                                             photometric_loss[i])

        # Step 3: Calculate the loss temporal-spatial wise
        # reconstruct context images
        context_temporal_spatial = []
        # [context_temporal_spatial_backward, context_temporal_spatial_forward]
        # [[left t-1, right t-1], [left t+1, right t+1]]
        # [[[B,H,W],[B,H,W]],[[B,H,W],[B,H,W]]]
        # reconstruct intrinsics
        K_temporal_spatial = K
        ref_K_temporal_spatial = []
        # reconstruct extrinsics
        extrinsics_1_ts = extrinsics
        extrinsics_2_ts = []
        # reconstruct pose
        poses_ts = []
        for l in range(len(context)):
            context_temporal_spatial.append(
                [context[l][left_swap, ...], context[l][right_swap, ...]])
            ref_K_temporal_spatial.append([
                K_temporal_spatial[left_swap, ...],
                K_temporal_spatial[right_swap, ...]
            ])
            extrinsics_2_ts.append(
                [extrinsics[left_swap, ...], extrinsics[right_swap, ...]])
            poses_ts.append([
                Pose(poses[l].item()[left_swap, ...]),
                Pose(poses[l].item()[right_swap, ...])
            ])
        # calculate spatial-wise photometric loss
        for j, (ref_image,
                pose) in enumerate(zip(context_temporal_spatial, poses_ts)):
            # Calculate warped images
            for k in range(len(ref_image)):
                ref_warped, valid_points_masks = self.warp_ref_image_spatial(
                    inv_depths, ref_image[k], K_temporal_spatial,
                    ref_K_temporal_spatial[j][k], Pose(extrinsics_1_ts),
                    Pose(extrinsics_2_ts[j][k]) @ pose[k].inverse())

                # for i in range(6):
                #     pic_orig = images[0][i].cpu().clone()
                #     pic_orig = (pic_orig.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8)
                #     pic_ref = context_spatial[0][i].cpu().clone()
                #     pic_ref = (pic_ref.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8)
                #     pic_warped = ref_warped[0][i].cpu().clone()
                #     pic_warped = (pic_warped.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8)
                #     pic_valid = valid_points_masks[0][i].cpu().clone()
                #     pic_valid = (pic_valid.permute(1,2,0).detach().numpy()*255).astype(np.uint8)
                #     final_frame = cv2.hconcat((pic_orig, pic_ref, pic_warped))
                #     cv2.imshow('temporal spatial warping', final_frame)
                #     cv2.waitKey()
                #     cv2.imshow('pic_valid', pic_valid)
                #     cv2.waitKey()

                # Calculate and store image loss
                photometric_loss = [self.calc_photometric_loss(ref_warped, images)[i] * valid_points_masks[i] \
                                    for i in range(len(valid_points_masks))]
                for i in range(self.n):
                    photometric_losses[i].append(
                        self.temporal_spatial_loss_weight *
                        photometric_loss[i])

        # Step 4: Calculate reduced photometric loss
        loss = self.reduce_photometric_loss(photometric_losses)
        # Include smoothness loss if requested
        if self.smooth_loss_weight > 0.0:
            loss += self.calc_smoothness_loss(inv_depths, images)
        # Return losses and metrics
        return {
            'loss': loss.unsqueeze(0),
            'metrics': self.metrics,
        }
    def forward(self,
                image,
                ref_images_temporal_context,
                ref_images_geometric_context,
                ref_images_geometric_context_temporal_context,
                inv_depths,
                poses_temporal_context,
                poses_geometric_context,
                poses_geometric_context_temporal_context,
                camera_type,
                intrinsics_poly_coeffs,
                intrinsics_principal_point,
                intrinsics_scale_factors,
                intrinsics_K,
                intrinsics_k,
                intrinsics_p,
                path_to_ego_mask,
                camera_type_geometric_context,
                intrinsics_poly_coeffs_geometric_context,
                intrinsics_principal_point_geometric_context,
                intrinsics_scale_factors_geometric_context,
                intrinsics_K_geometric_context,
                intrinsics_k_geometric_context,
                intrinsics_p_geometric_context,
                path_to_ego_mask_geometric_context,
                return_logs=False,
                progress=0.0):
        """
        Calculates training photometric loss.

        Parameters
        ----------
        image : torch.Tensor [B,3,H,W]
            Original image
        context : list of torch.Tensor [B,3,H,W]
            Context containing a list of reference images
        inv_depths : list of torch.Tensor [B,1,H,W]
            Predicted depth maps for the original image, in all scales
        K : torch.Tensor [B,3,3]
            Original camera intrinsics
        ref_K : torch.Tensor [B,3,3]
            Reference camera intrinsics
        poses : list of Pose
            Camera transformation between original and context
        return_logs : bool
            True if logs are saved for visualization
        progress : float
            Training percentage

        Returns
        -------
        losses_and_metrics : dict
            Output dictionary
        """
        # If using progressive scaling
        self.n = self.progressive_scaling(progress)
        # Loop over all reference images
        photometric_losses = [[] for _ in range(self.n)]
        images = match_scales(image, inv_depths, self.n)

        n_temporal_context = len(ref_images_temporal_context)
        n_geometric_context = len(ref_images_geometric_context)
        assert len(ref_images_geometric_context_temporal_context
                   ) == n_temporal_context * n_geometric_context
        B = len(path_to_ego_mask)

        device = image.get_device()

        # getting ego masks for target and source cameras
        # fullsize mask
        H_full = 800
        W_full = 1280
        ego_mask_tensor = torch.ones(B, 1, H_full, W_full).to(device)
        ref_ego_mask_tensor_geometric_context = []
        for i_geometric_context in range(n_geometric_context):
            ref_ego_mask_tensor_geometric_context.append(
                torch.ones(B, 1, H_full, W_full).to(device))
        for b in range(B):
            ego_mask_tensor[b, 0] = torch.from_numpy(
                np.load(path_to_ego_mask[b])).float()
            for i_geometric_context in range(n_geometric_context):
                if camera_type_geometric_context[b, i_geometric_context] != 2:
                    ref_ego_mask_tensor_geometric_context[i_geometric_context][b, 0] = \
                        torch.from_numpy(np.load(path_to_ego_mask_geometric_context[i_geometric_context][b])).float()

        # resized masks
        ego_mask_tensors = []
        ref_ego_mask_tensors_geometric_context = []
        for i_geometric_context in range(n_geometric_context):
            ref_ego_mask_tensors_geometric_context.append([])
        for i in range(self.n):
            _, _, H, W = images[i].shape
            if W < W_full:
                ego_mask_tensors.append(
                    interpolate_image(ego_mask_tensor,
                                      shape=(B, 1, H, W),
                                      mode='nearest',
                                      align_corners=None))
                for i_geometric_context in range(n_geometric_context):
                    ref_ego_mask_tensors_geometric_context[
                        i_geometric_context].append(
                            interpolate_image(
                                ref_ego_mask_tensor_geometric_context[
                                    i_geometric_context],
                                shape=(B, 1, H, W),
                                mode='nearest',
                                align_corners=None))
            else:
                ego_mask_tensors.append(ego_mask_tensor)
                for i_geometric_context in range(n_geometric_context):
                    ref_ego_mask_tensors_geometric_context[
                        i_geometric_context].append(
                            ref_ego_mask_tensor_geometric_context[
                                i_geometric_context])

        # Dummy camera mask (B x n_geometric_context)
        Cmask = (camera_type_geometric_context == 2).detach()

        # temporal context
        for j, (ref_image, pose) in enumerate(
                zip(ref_images_temporal_context, poses_temporal_context)):
            # Calculate warped images
            ref_warped, ref_ego_mask_tensors_warped = \
                self.warp_ref_image(inv_depths,
                                    camera_type,
                                    intrinsics_poly_coeffs,
                                    intrinsics_principal_point,
                                    intrinsics_scale_factors,
                                    intrinsics_K,
                                    intrinsics_k,
                                    intrinsics_p,
                                    ref_image,
                                    pose,
                                    ego_mask_tensors,
                                    camera_type,
                                    intrinsics_poly_coeffs,
                                    intrinsics_principal_point,
                                    intrinsics_scale_factors,
                                    intrinsics_K,
                                    intrinsics_k,
                                    intrinsics_p)
            # Calculate and store image loss
            photometric_loss = self.calc_photometric_loss(ref_warped, images)
            for i in range(self.n):
                photometric_losses[i].append(photometric_loss[i] *
                                             ego_mask_tensors[i] *
                                             ref_ego_mask_tensors_warped[i])
            # If using automask
            if self.automask_loss:
                # Calculate and store unwarped image loss
                ref_images = match_scales(ref_image, inv_depths, self.n)
                unwarped_image_loss = self.calc_photometric_loss(
                    ref_images, images)
                for i in range(self.n):
                    photometric_losses[i].append(unwarped_image_loss[i] *
                                                 ego_mask_tensors[i] *
                                                 ego_mask_tensors[i])

        # geometric context
        for j, (ref_image, pose) in enumerate(
                zip(ref_images_geometric_context, poses_geometric_context)):
            if Cmask[:, j].sum() < B:
                # Calculate warped images
                ref_warped, ref_ego_mask_tensors_warped = \
                    self.warp_ref_image(inv_depths,
                                        camera_type,
                                        intrinsics_poly_coeffs,
                                        intrinsics_principal_point,
                                        intrinsics_scale_factors,
                                        intrinsics_K,
                                        intrinsics_k,
                                        intrinsics_p,
                                        ref_image,
                                        Pose(pose),
                                        ref_ego_mask_tensors_geometric_context[j],
                                        camera_type_geometric_context[:, j],
                                        intrinsics_poly_coeffs_geometric_context[j],
                                        intrinsics_principal_point_geometric_context[j],
                                        intrinsics_scale_factors_geometric_context[j],
                                        intrinsics_K_geometric_context[j],
                                        intrinsics_k_geometric_context[j],
                                        intrinsics_p_geometric_context[j])
                # Calculate and store image loss
                photometric_loss = self.calc_photometric_loss(
                    ref_warped, images)
                for i in range(self.n):
                    photometric_loss[i][Cmask[:, j]] = 0.0
                    photometric_losses[i].append(
                        photometric_loss[i] * ego_mask_tensors[i] *
                        ref_ego_mask_tensors_warped[i])
                # If using automask
                if self.automask_loss:
                    # Calculate and store unwarped image loss
                    ref_images = match_scales(ref_image, inv_depths, self.n)
                    unwarped_image_loss = self.calc_photometric_loss(
                        ref_images, images)
                    for i in range(self.n):
                        unwarped_image_loss[i][Cmask[:, j]] = 0.0
                        photometric_losses[i].append(
                            unwarped_image_loss[i] * ego_mask_tensors[i] *
                            ref_ego_mask_tensors_geometric_context[j][i])

        # geometric-temporal context
        for j, (ref_image, pose) in enumerate(
                zip(ref_images_geometric_context_temporal_context,
                    poses_geometric_context_temporal_context)):
            j_geometric = j // n_temporal_context
            if Cmask[:, j_geometric].sum() < B:
                # Calculate warped images
                ref_warped, ref_ego_mask_tensors_warped = \
                    self.warp_ref_image(inv_depths,
                                        camera_type,
                                        intrinsics_poly_coeffs,
                                        intrinsics_principal_point,
                                        intrinsics_scale_factors,
                                        intrinsics_K,
                                        intrinsics_k,
                                        intrinsics_p,
                                        ref_image,
                                        Pose(pose.mat @ poses_geometric_context[j_geometric]), # ATTENTION A VERIFIER (changement de repere !)
                                        ref_ego_mask_tensors_geometric_context[j_geometric],
                                        camera_type_geometric_context[:, j_geometric],
                                        intrinsics_poly_coeffs_geometric_context[j_geometric],
                                        intrinsics_principal_point_geometric_context[j_geometric],
                                        intrinsics_scale_factors_geometric_context[j_geometric],
                                        intrinsics_K_geometric_context[j_geometric],
                                        intrinsics_k_geometric_context[j_geometric],
                                        intrinsics_p_geometric_context[j_geometric])
                # Calculate and store image loss
                photometric_loss = self.calc_photometric_loss(
                    ref_warped, images)
                for i in range(self.n):
                    photometric_loss[i][Cmask[:, j_geometric]] = 0.0
                    photometric_losses[i].append(
                        photometric_loss[i] * ego_mask_tensors[i] *
                        ref_ego_mask_tensors_warped[i])
                # If using automask
                if self.automask_loss:
                    # Calculate and store unwarped image loss
                    ref_images = match_scales(ref_image, inv_depths, self.n)
                    unwarped_image_loss = self.calc_photometric_loss(
                        ref_images, images)
                    for i in range(self.n):
                        unwarped_image_loss[i][Cmask[:, j_geometric]] = 0.0
                        photometric_losses[i].append(
                            unwarped_image_loss[i] * ego_mask_tensors[i] *
                            ref_ego_mask_tensors_geometric_context[j_geometric]
                            [i])

        # Calculate reduced photometric loss
        loss = self.nonzero_reduce_photometric_loss(photometric_losses)
        # Include smoothness loss if requested
        if self.smooth_loss_weight > 0.0:
            loss += self.calc_smoothness_loss(
                [a * b for a, b in zip(inv_depths, ego_mask_tensors)],
                [a * b for a, b in zip(images, ego_mask_tensors)])
        # Return losses and metrics
        return {
            'loss': loss.unsqueeze(0),
            'metrics': self.metrics,
        }
    def reconstruct_distorted(self, depth, mask, frame='w'):
        """
        Reconstructs pixel-wise 3D points from a depth map.

        Parameters
        ----------
        depth : torch.Tensor [B,1,H,W]
            Depth map for the camera
        frame : 'w'
            Reference frame: 'c' for camera and 'w' for world

        Returns
        -------
        points : torch.tensor [B,3,H,W]
            Pixel-wise 3D points
        """
        B, C, H, W = depth[mask].shape
        assert C == 1

        # Create flat index grid
        grid = image_grid(B, H, W, depth.dtype, depth.device, normalized=False)  # [B,3,H,W]
        flat_grid = grid.view(B, 3, -1)  # [B,3,HW]

        device = depth.get_device()

        # Estimate the outward undistored rays in the camera frame
        Xnorm_u = (self.Kinv[mask].bmm(flat_grid)).view(B, 3, H, W)

        version = 'v1'
        N = 5

        if version == 'v1':
            x = Xnorm_u[:, 0, :, :].view(B, 1, H, W)
            y = Xnorm_u[:, 1, :, :].view(B, 1, H, W)

            x_src = torch.clone(x)
            y_src = torch.clone(y)

            for _ in range(N):
                r2 = x.pow(2) + y.pow(2)
                r4 = r2.pow(2)
                r6 = r2 * r4

                rad_dist = 1 / (1 + self.k1[mask].view(B,1,1,1) * r2 + self.k2[mask].view(B,1,1,1) * r4 + self.k3[mask].view(B,1,1,1) * r6)
                tang_dist_x = 2 * self.p1[mask].view(B,1,1,1) * x * y + self.p2[mask].view(B,1,1,1) * (r2 + 2 * x.pow(2))
                tang_dist_y = 2 * self.p2[mask].view(B,1,1,1) * x * y + self.p1[mask].view(B,1,1,1) * (r2 + 2 * y.pow(2))

                x = (x_src - tang_dist_x) * rad_dist
                y = (y_src - tang_dist_y) * rad_dist

            # Distorted rays
            Xnorm_d = torch.stack([x.squeeze(1), y.squeeze(1), torch.ones_like(x).squeeze(1)], dim=1)

        elif version == 'v2':
            x_list = []
            y_list = []

            x_list.append(Xnorm_u[:, 0, :, :].view(B, 1, H, W))
            y_list.append(Xnorm_u[:, 1, :, :].view(B, 1, H, W))

            for n in range(N):
                r2 = x_list[-1].pow(2) + y_list[-1].pow(2)
                r4 = r2.pow(2)
                r6 = r2 * r4

                rad_dist = 1 / (1 + self.k1[mask].view(B,1,1,1) * r2 + self.k2[mask].view(B,1,1,1) * r4 + self.k3[mask].view(B,1,1,1) * r6)
                tang_dist_x = 2 * self.p1[mask].view(B,1,1,1) * x_list[-1] * y_list[-1] + self.p2[mask].view(B,1,1,1) * (r2 + 2 * x_list[-1].pow(2))
                tang_dist_y = 2 * self.p2[mask].view(B,1,1,1) * x_list[-1] * y_list[-1] + self.p1[mask].view(B,1,1,1) * (r2 + 2 * y_list[-1].pow(2))

                x_list.append((x_list[0] - tang_dist_x) * rad_dist)
                y_list.append((y_list[0] - tang_dist_y) * rad_dist)

            # Distorted rays
            Xnorm_d = torch.stack([x_list[-1].squeeze(1), y_list[-1].squeeze(1), torch.ones_like(x_list[-1]).squeeze(1)], dim=1)

        elif version == 'v3':
            Xnorm_d = torch.zeros(B, 3, H, W)
            for b in range(B):
                Xnorm_d[b] = torch.from_numpy(np.load(self.path_to_theta_lut[b]))

        Xnorm_d = Xnorm_d.to(device)

        # ATTENTION RENORMALISER Xnorm_d

        #Xnorm_d /= norm(Xnorm_d)
        # Scale rays to metric depth
        Xc = ((Xnorm_d / torch.sqrt((Xnorm_d[:, 0, :, :].pow(2)
                                    + Xnorm_d[:, 1, :, :].pow(2)
                                    + Xnorm_d[:, 2, :, :].pow(2)).clamp(min=1e-5)).view(B, 1, H, W)) * depth[mask]).float()

        # If in camera frame of reference
        if frame == 'c':
            return Xc
        # If in world frame of reference
        elif frame == 'w':
            return Pose(self.Twc.mat[mask]) @ Xc
        # If none of the above
        else:
            raise ValueError('Unknown reference frame {}'.format(frame))
def infer_optimal_calib(input_files, model_wrappers, image_shape, half):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    N_files = len(input_files[0])
    N_cams = 4

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    extra_rot_deg = torch.autograd.Variable(torch.zeros(12),
                                            requires_grad=True)

    save_pictures = True

    n_epochs = 100
    learning_rate = 0.05
    loss_tab = np.zeros(n_epochs)

    optimizer = optim.Adam([extra_rot_deg], lr=learning_rate)

    for epoch in range(n_epochs):

        extra_rot_deg_grad_list = []
        loss_sum = 0

        for i_file in range(N_files):

            cams = []
            not_masked = []

            camera_names = []
            for i_cam in range(N_cams):
                camera_names.append(get_camera_name(
                    input_files[i_cam][i_file]))

            for i_cam in range(N_cams):
                base_folder_str = get_base_folder(input_files[i_cam][i_file])
                split_type_str = get_split_type(input_files[i_cam][i_file])
                seq_name_str = get_sequence_name(input_files[i_cam][i_file])
                camera_str = get_camera_name(input_files[i_cam][i_file])

                calib_data = {}
                calib_data[camera_str] = read_raw_calib_files_camera_valeo(
                    base_folder_str, split_type_str, seq_name_str, camera_str)

                path_to_theta_lut = get_path_to_theta_lut(
                    input_files[i_cam][0])
                path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
                poly_coeffs, principal_point, scale_factors = get_intrinsics(
                    input_files[i_cam][0], calib_data)

                poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
                principal_point = torch.from_numpy(principal_point).unsqueeze(
                    0)
                scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
                pose_matrix = torch.from_numpy(
                    get_extrinsics_pose_matrix(input_files[i_cam][0],
                                               calib_data)).unsqueeze(0)
                pose_tensor = Pose(pose_matrix)

                cams.append(
                    CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                                  path_to_ego_mask=[path_to_ego_mask],
                                  poly_coeffs=poly_coeffs.float(),
                                  principal_point=principal_point.float(),
                                  scale_factors=scale_factors.float(),
                                  Tcw=pose_tensor))
                if torch.cuda.is_available():
                    cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()),
                                                 dtype=dtype)

                ego_mask = np.load(path_to_ego_mask)
                not_masked.append(
                    torch.from_numpy(ego_mask.astype(float)).cuda().float())

            base_0, ext_0 = os.path.splitext(
                os.path.basename(input_files[0][i_file]))
            print(base_0)

            images = []
            images_numpy = []
            pred_inv_depths = []
            pred_depths = []
            for i_cam in range(N_cams):
                images.append(
                    load_image(input_files[i_cam][i_file]).convert('RGB'))
                images[i_cam] = resize_image(images[i_cam], image_shape)
                images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
                if torch.cuda.is_available():
                    images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()),
                                                     dtype=dtype)
                images_numpy.append(images[i_cam][0].cpu().numpy())
                #images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]]
                with torch.no_grad():
                    pred_inv_depths.append(model_wrappers[0].depth(
                        images[i_cam]))
                    pred_depths.append(inv2depth(pred_inv_depths[i_cam]))
                #mask_colors_blue = np.sum(np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6  # bleu ciel

            def photo_loss_2imgs(i_cam1, i_cam2, rot_vect_list, save_pictures,
                                 rot_str):

                # Computes the photometric loss between 2 images of adjacent cameras
                # It reconstructs each image from the adjacent one, using calibration data,
                # depth prediction model and correction angles alpha, beta, gamma

                for i, i_cam in enumerate([i_cam1, i_cam2]):
                    base_folder_str = get_base_folder(
                        input_files[i_cam][i_file])
                    split_type_str = get_split_type(input_files[i_cam][i_file])
                    seq_name_str = get_sequence_name(
                        input_files[i_cam][i_file])
                    camera_str = get_camera_name(input_files[i_cam][i_file])
                    calib_data = {}
                    calib_data[camera_str] = read_raw_calib_files_camera_valeo(
                        base_folder_str, split_type_str, seq_name_str,
                        camera_str)
                    pose_matrix = get_extrinsics_pose_matrix_extra_rot_torch(
                        input_files[i_cam][i_file], calib_data,
                        rot_vect_list[i]).unsqueeze(0)
                    pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(
                        rank()),
                                                       dtype=dtype)
                    CameraFisheye.Twc.fget.cache_clear()
                    cams[i_cam].Tcw = pose_tensor

                world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1],
                                                         frame='w')
                world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2],
                                                         frame='w')

                #depth1_wrt_cam2 = torch.norm(cams[i_cam2].Tcw @ world_points1, dim=1, keepdim=True)
                #depth2_wrt_cam1 = torch.norm(cams[i_cam1].Tcw @ world_points2, dim=1, keepdim=True)

                ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w')
                ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w')

                reconstructedImg2to1 = funct.grid_sample(images[i_cam2] *
                                                         not_masked[i_cam2],
                                                         ref_coords1to2,
                                                         mode='bilinear',
                                                         padding_mode='zeros',
                                                         align_corners=True)
                reconstructedImg1to2 = funct.grid_sample(images[i_cam1] *
                                                         not_masked[i_cam1],
                                                         ref_coords2to1,
                                                         mode='bilinear',
                                                         padding_mode='zeros',
                                                         align_corners=True)

                #print(reconstructedImg2to1)
                # z = reconstructedImg2to1.sum()
                # z.backward()

                if save_pictures:
                    if epoch == 0:
                        cv2.imwrite(
                            '/home/vbelissen/Downloads/cam_' + str(i_cam1) +
                            '_' + str(i_file) + '_orig.png',
                            torch.transpose(
                                (images[i_cam1][0, :, :, :]
                                 ).unsqueeze(0).unsqueeze(4), 1,
                                4).squeeze().detach().cpu().numpy() * 255)
                        cv2.imwrite(
                            '/home/vbelissen/Downloads/cam_' + str(i_cam2) +
                            '_' + str(i_file) + '_orig.png',
                            torch.transpose(
                                (images[i_cam2][0, :, :, :]
                                 ).unsqueeze(0).unsqueeze(4), 1,
                                4).squeeze().detach().cpu().numpy() * 255)
                    cv2.imwrite(
                        '/home/vbelissen/Downloads/cam_' + str(i_cam1) + '_' +
                        str(i_file) + '_recons_from_' + str(i_cam2) + '_rot' +
                        rot_str + '.png',
                        torch.transpose(
                            ((reconstructedImg2to1 * not_masked[i_cam1]
                              )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1,
                            4).squeeze().detach().cpu().numpy() * 255)
                    cv2.imwrite(
                        '/home/vbelissen/Downloads/cam_' + str(i_cam2) + '_' +
                        str(i_file) + '_recons_from_' + str(i_cam1) + '_rot' +
                        rot_str + '.png',
                        torch.transpose(
                            ((reconstructedImg1to2 * not_masked[i_cam2]
                              )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1,
                            4).squeeze().detach().cpu().numpy() * 255)

                #reconstructedDepth2to1 = funct.grid_sample(pred_depths[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True)
                #reconstructedDepth1to2 = funct.grid_sample(pred_depths[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True)

                #reconstructedEgo2to1 = funct.grid_sample(not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True)
                #reconstructedEgo1to2 = funct.grid_sample(not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True)

                l1_loss_1 = torch.abs(images[i_cam1] * not_masked[i_cam1] -
                                      reconstructedImg2to1 *
                                      not_masked[i_cam1])
                l1_loss_2 = torch.abs(images[i_cam2] * not_masked[i_cam2] -
                                      reconstructedImg1to2 *
                                      not_masked[i_cam2])

                # SSIM loss
                ssim_loss_weight = 0.85
                ssim_loss_1 = SSIM(images[i_cam1] * not_masked[i_cam1],
                                   reconstructedImg2to1 * not_masked[i_cam1],
                                   C1=1e-4,
                                   C2=9e-4,
                                   kernel_size=3)
                ssim_loss_2 = SSIM(images[i_cam2] * not_masked[i_cam2],
                                   reconstructedImg1to2 * not_masked[i_cam2],
                                   C1=1e-4,
                                   C2=9e-4,
                                   kernel_size=3)

                ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.)
                ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.)

                # Weighted Sum: alpha * ssim + (1 - alpha) * l1
                photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(
                    1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(
                        1, True)
                photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(
                    1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(
                        1, True)

                mask1 = photometric_loss_1 != 0
                s1 = mask1.sum()
                loss_1 = (photometric_loss_1 *
                          mask1).sum() / s1 if s1 > 0 else 0

                mask2 = photometric_loss_2 != 0
                s2 = mask2.sum()
                loss_2 = (photometric_loss_2 *
                          mask2).sum() / s2 if s2 > 0 else 0

                #valid1 = ...
                #valid2 = ...

                return loss_1 + loss_2

            loss = sum([
                photo_loss_2imgs(
                    i, (i + 1) % 4, [
                        extra_rot_deg[3 * i:3 * (i + 1)],
                        extra_rot_deg[3 * ((i + 1) % 4):3 *
                                      (((i + 1) % 4) + 1)]
                    ], save_pictures, '_' + '_'.join([
                        str(int(100 * extra_rot_deg[i_rot]) / 100)
                        for i_rot in range(12)
                    ])) for i in range(4)
            ])
            #print(loss)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                #extra_rot_deg_grad_list.append(extra_rot_deg.grad.clone())
                #extra_rot_deg.grad.zero_()
                loss_sum += loss.item()
                print(loss)

        with torch.no_grad():
            #extra_rot_deg.sub_(sum(extra_rot_deg_grad_list)/N_files*learning_rate)
            print('End of epoch')
            print(extra_rot_deg)

        loss_tab[epoch] = loss_sum / N_files

    plt.plot(loss_tab)
    plt.show()
예제 #14
0
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers,
                               image_shape, half, save, stop):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    N_cams = len(input_files)
    N_files = len(input_files[0])

    camera_names = []
    for i_cam in range(N_cams):
        camera_names.append(get_camera_name(input_files[i_cam][0]))

    cams = []
    not_masked = []

    cams_x = []
    cams_y = []
    cams_z = []

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1),
                                               max_bound=(1000, 1000, 5))

    # let's assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str = get_split_type(input_files[i_cam][0])
        seq_name_str = get_sequence_name(input_files[i_cam][0])
        camera_str = get_camera_name(input_files[i_cam][0])

        calib_data = {}
        calib_data[camera_str] = read_raw_calib_files_camera_valeo(
            base_folder_str, split_type_str, seq_name_str, camera_str)

        cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m']))
        cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m']))
        cams_z.append(float(calib_data[camera_str]['extrinsics']['pos_y_m']))

        path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0])
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors = get_intrinsics(
            input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        pose_matrix = torch.from_numpy(
            get_extrinsics_pose_matrix(input_files[i_cam][0],
                                       calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)

        cams.append(
            CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                          path_to_ego_mask=[path_to_ego_mask],
                          poly_coeffs=poly_coeffs.float(),
                          principal_point=principal_point.float(),
                          scale_factors=scale_factors.float(),
                          Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(ego_mask.astype(bool).reshape(-1))

    cams_middle = np.zeros(3)
    cams_middle[0] = (cams_x[0] + cams_x[1] + cams_x[2] + cams_x[3]) / 4
    cams_middle[1] = (cams_y[0] + cams_y[1] + cams_y[2] + cams_y[3]) / 4
    cams_middle[2] = (cams_z[0] + cams_z[1] + cams_z[2] + cams_z[3]) / 4

    # create output dirs for each cam
    seq_name = get_sequence_name(input_files[0][0])
    # for i_cam in range(N_cams):
    #     os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True)
    #     os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True)

    # Premiere passe sur toutes les cameras

    i_file = 0
    values = np.arange(1.0, 15.0, 0.5)
    N_values = values.size
    nb_points = np.zeros((N_values, 4))
    n = 0
    for K in values:
        print(K)

        base_0, ext_0 = os.path.splitext(
            os.path.basename(input_files[0][i_file]))
        print(base_0)

        images = []
        images_numpy = []
        pred_inv_depths = []
        pred_depths = []
        world_points = []
        input_depth_files = []
        has_gt_depth = []
        gt_depth = []
        gt_depth_3d = []
        pcl_full = []
        pcl_only_inliers = []
        pcl_only_inliers_left = []
        pcl_only_inliers_right = []
        pcl_only_outliers = []
        ind_clean = []
        ind_clean_left = []
        ind_clean_right = []
        pcl_gt = []
        rgb = []
        viz_pred_inv_depths = []
        for i_cam in range(N_cams):
            images.append(
                load_image(input_files[i_cam][i_file]).convert('RGB'))
            images[i_cam] = resize_image(images[i_cam], image_shape)
            images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
            if torch.cuda.is_available():
                images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()),
                                                 dtype=dtype)
            images_numpy.append(images[i_cam][0].cpu().numpy())
            images_numpy[i_cam] = images_numpy[i_cam].reshape(
                (3, -1)).transpose()
            images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]]
            pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam]))
            pred_depths.append(inv2depth(pred_inv_depths[i_cam]))
            world_points.append(cams[i_cam].reconstruct(K * pred_depths[i_cam],
                                                        frame='w'))
            world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
            world_points[i_cam] = world_points[i_cam].reshape(
                (3, -1)).transpose()
            world_points[i_cam] = world_points[i_cam][not_masked[i_cam]]
            cam_name = camera_names[i_cam]
            cam_int = cam_name.split('_')[-1]
            input_depth_files.append(get_depth_file(
                input_files[i_cam][i_file]))
            # has_gt_depth.append(os.path.exists(input_depth_files[i_cam]))
            # if has_gt_depth[i_cam]:
            #     gt_depth.append(np.load(input_depth_files[i_cam])['velodyne_depth'].astype(np.float32))
            #     gt_depth[i_cam] = torch.from_numpy(gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
            #     if torch.cuda.is_available():
            #         gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)
            #     gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w'))
            #     gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy()
            #     gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape((3, -1)).transpose()
            #     #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]]
            # else:
            #     gt_depth.append(0)
            #     gt_depth_3d.append(0)
            pcl_full.append(o3d.geometry.PointCloud())
            pcl_full[i_cam].points = o3d.utility.Vector3dVector(
                world_points[i_cam])
            pcl_full[i_cam].colors = o3d.utility.Vector3dVector(
                images_numpy[i_cam])

            pcl = pcl_full[i_cam]  #.select_by_index(ind)
            points_tmp = np.asarray(pcl.points)
            colors_tmp = np.asarray(pcl.colors)
            # remove points that are above
            mask_height = points_tmp[:,
                                     2] > 2.5  # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3)
            mask_colors_blue = np.sum(
                np.abs(colors_tmp - np.array([0.6, 0.8, 1])),
                axis=1) < 0.6  # bleu ciel
            mask_colors_green = np.sum(
                np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8
            mask_colors_green2 = np.sum(
                np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2
            mask = 1 - mask_height * mask_colors_blue
            mask2 = 1 - mask_height * mask_colors_green
            mask3 = 1 - mask_height * mask_colors_green2
            #maskY = points_tmp[:, 1] > 1
            mask = mask * mask2 * mask3  #*maskY
            cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7,
                                                     std_ratio=1.4)
            ind_clean.append(
                list(set.intersection(set(list(np.where(mask)[0])), set(ind))))
            #pcl_only_inliers.append(pcl.select_by_index(ind_clean[i_cam]))

            # if has_gt_depth[i_cam]:
            #     pcl_gt.append(o3d.geometry.PointCloud())
            #     pcl_gt[i_cam].points = o3d.utility.Vector3dVector(gt_depth_3d[i_cam])
            #     gt_inv_depth = 1 / (np.linalg.norm(gt_depth_3d[i_cam], axis=1) + 1e-6)
            #     cm = get_cmap('plasma')
            #     normalizer = .35#np.percentile(gt_inv_depth, 95)
            #     gt_inv_depth /= (normalizer + 1e-6)
            #     #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape)  # [:, :3]
            #
            #     pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3])
            # else:
            #     pcl_gt.append(0)
        #o3d.visualization.draw_geometries(pcl_full + [e for i, e in enumerate(pcl_gt) if e != 0])
        color_cam = True
        if color_cam:
            for i_cam in range(4):
                if i_cam == 0:
                    pcl_full[i_cam].paint_uniform_color(
                        [1.0, 0.0, 0.0]
                    )  #.colors = o3d.utility.Vector3dVector(images_numpy[i_cam])
                elif i_cam == 1:
                    pcl_full[i_cam].paint_uniform_color([0.0, 1.0, 0.0])
                elif i_cam == 2:
                    pcl_full[i_cam].paint_uniform_color([0.0, 0.0, 1.0])
                elif i_cam == 3:
                    pcl_full[i_cam].paint_uniform_color([0.3, 0.4, 0.3])
        dist_total = np.zeros(4)
        for i_cam in range(4):
            points_tmp = np.asarray(pcl_full[i_cam].points)
            #points_tmp_left  = np.asarray(pcl_full[(i_cam-1) % 4].points)
            #points_tmp_right = np.asarray(pcl_full[(i_cam+1) % 4].points)

            if i_cam == 0:
                maskX_left = points_tmp[:, 0] > cams_x[0]
                maskX_right = points_tmp[:, 0] > cams_x[0]
                maskY_left = points_tmp[:, 1] > cams_y[3]
                maskY_right = points_tmp[:, 1] < cams_y[1]
            elif i_cam == 1:
                maskX_left = points_tmp[:, 0] > cams_x[0]
                maskX_right = points_tmp[:, 0] < cams_x[2]
                maskY_left = points_tmp[:, 1] < cams_y[1]
                maskY_right = points_tmp[:, 1] < cams_y[1]
            elif i_cam == 2:
                maskX_left = points_tmp[:, 0] < cams_x[2]
                maskX_right = points_tmp[:, 0] < cams_x[2]
                maskY_left = points_tmp[:, 1] < cams_y[1]
                maskY_right = points_tmp[:, 1] > cams_y[3]
            elif i_cam == 3:
                maskX_left = points_tmp[:, 0] < cams_x[2]
                maskX_right = points_tmp[:, 0] > cams_x[0]
                maskY_left = points_tmp[:, 1] > cams_y[3]
                maskY_right = points_tmp[:, 1] > cams_y[3]

            far_distance = 5
            mask_far = np.sqrt(
                np.square(points_tmp[:, 0] - cams_middle[0]) +
                np.square(points_tmp[:, 1] - cams_middle[1])) < far_distance
            ind_clean_left.append(
                list(
                    set.intersection(
                        set(
                            list(
                                np.where(mask_far *
                                         maskX_left * maskY_left)[0])),
                        set(ind_clean[i_cam]))))
            ind_clean_right.append(
                list(
                    set.intersection(
                        set(
                            list(
                                np.where(mask_far * maskX_right *
                                         maskY_right)[0])),
                        set(ind_clean[i_cam]))))

        for i_cam in range(4):
            pcl1 = pcl_full[i_cam].select_by_index(
                ind_clean_right[i_cam]).uniform_down_sample(10)
            pcl2 = pcl_full[(i_cam + 1) % 4].select_by_index(
                ind_clean_left[(i_cam + 1) % 4]).uniform_down_sample(10)
            #o3d.visualization.draw_geometries([pcl1, pcl2])
            dists = pcl1.compute_point_cloud_distance(pcl2)
            nb_points[n, i_cam] = np.sum(np.asarray(dists) < 0.5)
            print(nb_points[n, i_cam])
            dists = np.mean(np.asarray(dists))
            print(dists)
            dist_total[i_cam] = dists

        print(np.mean(dist_total))

        # i_cam = 0
        #
        # base_folder_str = get_base_folder(input_files[i_cam][0])
        # split_type_str = get_split_type(input_files[i_cam][0])
        # seq_name_str = get_sequence_name(input_files[i_cam][0])
        # camera_str = get_camera_name(input_files[i_cam][0])
        #
        # calib_data = {}
        # calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str)
        # pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix_extra_rot(input_files[i_cam][0], calib_data, 15.0, 0, 0)).unsqueeze(0)
        #
        #
        # # R_cam_0 = cams[0].Tcw.mat.cpu().numpy()[0, :3, :3]
        # # print(R_cam_0)
        # # t_cam_0 = cams[0].Tcw.mat.cpu().numpy()[0, :3, 3]
        # # c = np.cos(0.1)
        # # s = np.sin(0.1)
        # # extr_rot = np.array([[c,  0, s],
        # #                      [0, 1, 0],
        # #                      [-s, 0, c]])
        # # R_cam_0 = np.matmul(extr_rot, R_cam_0)
        # # print(R_cam_0)
        # # pose_matrix = torch.from_numpy(transform_from_rot_trans(R_cam_0, t_cam_0).astype(np.float32)).unsqueeze(0)
        # pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()), dtype=dtype)
        #
        # CameraFisheye.Twc.fget.cache_clear()
        # cams[0].Tcw = pose_tensor
        #
        #
        #
        # world_points[i_cam] = cams[i_cam].reconstruct(pred_depths[i_cam], frame='w')
        # world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
        # world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose()
        # world_points[i_cam] = world_points[i_cam][not_masked[i_cam]]
        # pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam])
        #
        # for i_cam in range(4):
        #     pcl1 = pcl_full[i_cam].select_by_index(ind_clean_right[i_cam]).uniform_down_sample(10)
        #     pcl2 = pcl_full[(i_cam + 1) % 4].select_by_index(ind_clean_left[(i_cam + 1) % 4]).uniform_down_sample(10)
        #     #o3d.visualization.draw_geometries([pcl1, pcl2])
        #     dists = pcl1.compute_point_cloud_distance(pcl2)
        #     nb_points[n, i_cam] = np.sum(np.asarray(dists) < 0.5)
        #     print(nb_points[n, i_cam])
        #     dists = np.mean(np.asarray(dists))
        #     print(dists)
        #     dist_total[i_cam] = dists
        #
        # print(np.mean(dist_total))
        #
        # i_cam1 = 0
        # i_cam2 = 1
        # pcl1 = pcl_full[i_cam1].select_by_index(ind_clean_right[i_cam1]).uniform_down_sample(10)
        # pcl2 = pcl_full[i_cam2].select_by_index(ind_clean_left[i_cam2]).uniform_down_sample(10)
        # dists1 = pcl1.compute_point_cloud_distance(pcl2)
        # dists2 = pcl2.compute_point_cloud_distance(pcl1)
        # close_points1 = np.where(np.asarray(dists1) < 0.5)[0]
        # close_points2 = np.where(np.asarray(dists2) < 0.5)[0]
        # pcl1 = pcl1.select_by_index(close_points1)
        # pcl2 = pcl2.select_by_index(close_points2)
        #
        # def pcl_distance(extra_rot_deg):
        #     i_cam = 0
        #
        #     base_folder_str = get_base_folder(input_files[i_cam][0])
        #     split_type_str = get_split_type(input_files[i_cam][0])
        #     seq_name_str = get_sequence_name(input_files[i_cam][0])
        #     camera_str = get_camera_name(input_files[i_cam][0])
        #
        #     calib_data = {}
        #     calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str)
        #     pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix_extra_rot(input_files[i_cam][0], calib_data, extra_rot_deg[0], extra_rot_deg[1], extra_rot_deg[2])).unsqueeze(0)
        #     pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()), dtype=dtype)
        #     CameraFisheye.Twc.fget.cache_clear()
        #     cams[i_cam].Tcw = pose_tensor
        #
        #     world_points[i_cam] = cams[i_cam].reconstruct(pred_depths[i_cam], frame='w')
        #
        #     world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
        #     world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose()
        #     world_points[i_cam] = world_points[i_cam][not_masked[i_cam]].astype(np.float64)
        #     pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam])
        #
        #     pcl1 = pcl_full[i_cam1].select_by_index(ind_clean_right[i_cam1]).uniform_down_sample(10).select_by_index(close_points1)
        #     pcl2 = pcl_full[i_cam2].select_by_index(ind_clean_left[i_cam2]).uniform_down_sample(10).select_by_index(close_points2)
        #
        #     dists = pcl1.compute_point_cloud_distance(pcl2)
        #     return np.mean(np.asarray(dists))
        #
        # print(pcl_distance([0.0, 0.0, 0.0]))
        # print(pcl_distance([15.0, 0.0, 0.0]))
        # print(pcl_distance([-0.01, 0.0, 0.0]))
        # print(pcl_distance([0.01, 0.0, 0.0]))
        #
        # s = time.time()
        # result = minimize(pcl_distance, x0=np.array([0.,0.0,0.]), method='L-BFGS-B', options={'disp': True})
        # # Method BFGS not working
        # # Method Nelder-Mead working - time : 12.842947721481323
        # # Method Powell maybe working - time : 4.3
        # # Method CG not working
        # # Method L-BFGS-B not working
        #
        #
        #
        # print("time : " + str(time.time() - s))
        # print(result)

        close_points1 = []
        close_points2 = []
        for i_cam in range(4):
            i_cam1 = i_cam
            i_cam2 = (i_cam + 1) % 4
            pcl1 = pcl_full[i_cam1].select_by_index(
                ind_clean_right[i_cam1]).uniform_down_sample(100)
            pcl2 = pcl_full[i_cam2].select_by_index(
                ind_clean_left[i_cam2]).uniform_down_sample(100)
            dists1 = pcl1.compute_point_cloud_distance(pcl2)
            dists2 = pcl2.compute_point_cloud_distance(pcl1)
            close_points1.append(np.where(np.asarray(dists1) < 0.5)[0])
            close_points2.append(np.where(np.asarray(dists2) < 0.5)[0])

        def pcl_distance(extra_rot_deg):
            for i_cam in range(4):
                base_folder_str = get_base_folder(input_files[i_cam][0])
                split_type_str = get_split_type(input_files[i_cam][0])
                seq_name_str = get_sequence_name(input_files[i_cam][0])
                camera_str = get_camera_name(input_files[i_cam][0])

                calib_data = {}
                calib_data[camera_str] = read_raw_calib_files_camera_valeo(
                    base_folder_str, split_type_str, seq_name_str, camera_str)
                pose_matrix = torch.from_numpy(
                    get_extrinsics_pose_matrix_extra_rot(
                        input_files[i_cam][0], calib_data,
                        extra_rot_deg[3 * i_cam + 0],
                        extra_rot_deg[3 * i_cam + 1],
                        extra_rot_deg[3 * i_cam + 2])).unsqueeze(0)
                pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()),
                                                   dtype=dtype)
                CameraFisheye.Twc.fget.cache_clear()
                cams[i_cam].Tcw = pose_tensor

                world_points[i_cam] = cams[i_cam].reconstruct(
                    pred_depths[i_cam], frame='w')

                world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
                world_points[i_cam] = world_points[i_cam].reshape(
                    (3, -1)).transpose()
                world_points[i_cam] = world_points[i_cam][
                    not_masked[i_cam]].astype(np.float64)
                pcl_full[i_cam].points = o3d.utility.Vector3dVector(
                    world_points[i_cam])

            dist_total = np.zeros(4)
            for i_cam in range(4):
                i_cam1 = i_cam
                i_cam2 = (i_cam + 1) % 4
                pcl1 = pcl_full[i_cam1].select_by_index(
                    ind_clean_right[i_cam1]).uniform_down_sample(
                        100).select_by_index(close_points1[i_cam])
                pcl2 = pcl_full[i_cam2].select_by_index(
                    ind_clean_left[i_cam2]).uniform_down_sample(
                        100).select_by_index(close_points2[i_cam])
                dists = pcl1.compute_point_cloud_distance(pcl2)
                dist_total[i_cam] = np.mean(np.asarray(dists))

            return np.mean(dist_total)

        result = minimize(pcl_distance,
                          x0=np.zeros(12),
                          method='Powell',
                          options={'disp': True})
        print(result)

        o3d.visualization.draw_geometries(
            [pcl_full[0], pcl_full[1], pcl_full[2], pcl_full[3]])
        for i_cam in range(4):
            pcl1 = pcl_full[i_cam].select_by_index(
                ind_clean_right[i_cam]).uniform_down_sample(10)
            pcl2 = pcl_full[(i_cam + 1) % 4].select_by_index(
                ind_clean_left[(i_cam + 1) % 4]).uniform_down_sample(10)
            o3d.visualization.draw_geometries([pcl1, pcl2])

        # vis_full = o3d.visualization.Visualizer()
        # vis_full.create_window(visible = True, window_name = 'full'+str(i_file))
        # for i_cam in range(N_cams):
        #     vis_full.add_geometry(pcl_full[i_cam])
        # for i, e in enumerate(pcl_gt):
        #     if e != 0:
        #         vis_full.add_geometry(e)
        # ctr = vis_full.get_view_control()
        # ctr.set_lookat(lookat_vector)
        # ctr.set_front(front_vector)
        # ctr.set_up(up_vector)
        # ctr.set_zoom(zoom_float)
        # #vis_full.run()
        # vis_full.destroy_window()
        i_cam2 = 0
        suff = ''
        vis_only_inliers = o3d.visualization.Visualizer()
        vis_only_inliers.create_window(visible=True,
                                       window_name='inliers' + str(i_file))
        for i_cam in range(N_cams):
            vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
        for i, e in enumerate(pcl_gt):
            if e != 0:
                vis_only_inliers.add_geometry(e)
        ctr = vis_only_inliers.get_view_control()
        ctr.set_lookat(lookat_vector)
        ctr.set_front(front_vector)
        ctr.set_up(up_vector)
        ctr.set_zoom(zoom_float)
        param = o3d.io.read_pinhole_camera_parameters(
            '/home/vbelissen/Downloads/test/cameras_jsons/test' +
            str(i_cam2 + 1) + suff + '.json')
        ctr.convert_from_pinhole_camera_parameters(param)
        opt = vis_only_inliers.get_render_option()
        opt.background_color = np.asarray([0, 0, 0])
        #vis_only_inliers.update_geometry('inliers0')
        vis_only_inliers.poll_events()
        vis_only_inliers.update_renderer()
        if stop:
            vis_only_inliers.run()
        #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
        #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
        vis_only_inliers.destroy_window()
        del ctr
        del vis_only_inliers
        del opt
        n += 1

    plt.plot(values, nb_points)
    plt.show()
    plt.close()
    plt.plot(values, np.sum(nb_points, axis=1))
    plt.show()
예제 #15
0
def infer_plot_and_save_3D_pcl(input_file, output_file, model_wrapper,
                               image_shape, half, save):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    if not is_image(output_file):
        # If not an image, assume it's a folder and append the input name
        os.makedirs(output_file, exist_ok=True)
        output_file = os.path.join(output_file, os.path.basename(input_file))

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    # Load image
    image = load_image(input_file).convert('RGB')
    # Resize and to tensor
    image = resize_image(image, image_shape)
    image = to_tensor(image).unsqueeze(0)

    # Send image to GPU if available
    if torch.cuda.is_available():
        image = image.to('cuda:{}'.format(rank()), dtype=dtype)

    # Depth inference (returns predicted inverse depth)
    pred_inv_depth = model_wrapper.depth(image)
    pred_depth = inv2depth(pred_inv_depth)

    base_folder_str = get_base_folder(input_file)
    split_type_str = get_split_type(input_file)
    seq_name_str = get_sequence_name(input_file)
    camera_str = get_camera_name(input_file)

    calib_data = {}
    calib_data[camera_str] = read_raw_calib_files_camera_valeo(
        base_folder_str, split_type_str, seq_name_str, camera_str)

    path_to_theta_lut = get_path_to_theta_lut(input_file)
    path_to_ego_mask = get_path_to_ego_mask(input_file)
    poly_coeffs, principal_point, scale_factors = get_intrinsics(
        input_file, calib_data)

    poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
    principal_point = torch.from_numpy(principal_point).unsqueeze(0)
    scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)

    pose_matrix = torch.from_numpy(
        get_extrinsics_pose_matrix(input_file, calib_data)).unsqueeze(0)
    pose_tensor = Pose(pose_matrix)

    ego_mask = np.load(path_to_ego_mask)
    not_masked = ego_mask.astype(bool).reshape(-1)

    cam = CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                        path_to_ego_mask=[path_to_ego_mask],
                        poly_coeffs=poly_coeffs.float(),
                        principal_point=principal_point.float(),
                        scale_factors=scale_factors.float(),
                        Tcw=pose_tensor)
    if torch.cuda.is_available():
        cam = cam.to('cuda:{}'.format(rank()), dtype=dtype)

    world_points = cam.reconstruct(pred_depth, frame='w')
    world_points = world_points[0].cpu().numpy()
    world_points = world_points.reshape((3, -1)).transpose()

    gt_depth_file = get_depth_file(input_file)
    gt_depth = np.load(gt_depth_file)['velodyne_depth'].astype(np.float32)
    gt_depth = torch.from_numpy(gt_depth).unsqueeze(0).unsqueeze(0)
    if torch.cuda.is_available():
        gt_depth = gt_depth.to('cuda:{}'.format(rank()), dtype=dtype)

    gt_depth_3d = cam.reconstruct(gt_depth, frame='w')
    gt_depth_3d = gt_depth_3d[0].cpu().numpy()
    gt_depth_3d = gt_depth_3d.reshape((3, -1)).transpose()

    world_points = world_points[not_masked]
    gt_depth_3d = gt_depth_3d[not_masked]

    pcl = o3d.geometry.PointCloud()
    pcl.points = o3d.utility.Vector3dVector(world_points)
    img_numpy = image[0].cpu().numpy()
    img_numpy = img_numpy.reshape((3, -1)).transpose()

    img_numpy = img_numpy[not_masked]
    pcl.colors = o3d.utility.Vector3dVector(img_numpy)
    #pcl.paint_uniform_color([1.0, 0.0, 0])

    #print("Radius oulier removal")
    #cl, ind = pcl.remove_radius_outlier(nb_points=10, radius=0.5)
    #display_inlier_outlier(pcl, ind)

    remove_outliers = True
    if remove_outliers:
        cl, ind = pcl.remove_statistical_outlier(nb_neighbors=10,
                                                 std_ratio=1.3)
        #display_inlier_outlier(pcl, ind)
        inlier_cloud = pcl.select_by_index(ind)
        #inlier_cloud.paint_uniform_color([0.0, 1.0, 0])
        outlier_cloud = pcl.select_by_index(ind, invert=True)
        outlier_cloud.paint_uniform_color([0.0, 0.0, 1.0])

    pcl_gt = o3d.geometry.PointCloud()
    pcl_gt.points = o3d.utility.Vector3dVector(gt_depth_3d)
    pcl_gt.paint_uniform_color([1.0, 0.0, 0])

    if remove_outliers:
        o3d.visualization.draw_geometries(
            [inlier_cloud, pcl_gt, outlier_cloud])
        o3d.visualization.draw_geometries([inlier_cloud, pcl_gt])

    o3d.visualization.draw_geometries([pcl, pcl_gt])

    rgb = image[0].permute(1, 2, 0).detach().cpu().numpy() * 255
    # Prepare inverse depth
    viz_pred_inv_depth = viz_inv_depth(pred_inv_depth[0]) * 255
    # Concatenate both vertically
    image = np.concatenate([rgb, viz_pred_inv_depth], 0)
    # Save visualization
    print('Saving {} to {}'.format(
        pcolor(input_file, 'cyan', attrs=['bold']),
        pcolor(output_file, 'magenta', attrs=['bold'])))
    imwrite(output_file, image[:, :, ::-1])
예제 #16
0
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers,
                               image_shape, half, save, stop):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    N_cams = len(input_files)
    N_files = len(input_files[0])

    camera_names = []
    for i_cam in range(N_cams):
        camera_names.append(get_camera_name(input_files[i_cam][0]))

    cams = []
    not_masked = []

    cams_x = []
    cams_y = []
    cams_z = []

    alpha_mask = 0.7

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1),
                                               max_bound=(1000, 1000, 5))

    # let's assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str = get_split_type(input_files[i_cam][0])
        seq_name_str = get_sequence_name(input_files[i_cam][0])
        camera_str = get_camera_name(input_files[i_cam][0])

        calib_data = {}
        calib_data[camera_str] = read_raw_calib_files_camera_valeo(
            base_folder_str, split_type_str, seq_name_str, camera_str)

        cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m']))
        cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m']))
        cams_z.append(float(calib_data[camera_str]['extrinsics']['pos_z_m']))

        path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0])
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors = get_intrinsics(
            input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        pose_matrix = torch.from_numpy(
            get_extrinsics_pose_matrix(input_files[i_cam][0],
                                       calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)

        cams.append(
            CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                          path_to_ego_mask=[path_to_ego_mask],
                          poly_coeffs=poly_coeffs.float(),
                          principal_point=principal_point.float(),
                          scale_factors=scale_factors.float(),
                          Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(ego_mask.astype(bool).reshape(-1))

    cams_middle = np.zeros(3)
    cams_middle[0] = (
        cams[0].Twc.mat.cpu().numpy()[0, 0, 3] +
        cams[1].Twc.mat.cpu().numpy()[0, 0, 3] +
        cams[2].Twc.mat.cpu().numpy()[0, 0, 3] +
        cams[3].Twc.mat.cpu().numpy()[0, 0, 3]
    ) / 4  #(cams_x[0] + cams_x[1] + cams_x[2] + cams_x[3]) / 4
    cams_middle[1] = (
        cams[0].Twc.mat.cpu().numpy()[0, 1, 3] +
        cams[1].Twc.mat.cpu().numpy()[0, 1, 3] +
        cams[2].Twc.mat.cpu().numpy()[0, 1, 3] +
        cams[3].Twc.mat.cpu().numpy()[0, 1, 3]
    ) / 4  #(cams_y[0] + cams_y[1] + cams_y[2] + cams_y[3]) / 4
    cams_middle[2] = (
        cams[0].Twc.mat.cpu().numpy()[0, 2, 3] +
        cams[1].Twc.mat.cpu().numpy()[0, 2, 3] +
        cams[2].Twc.mat.cpu().numpy()[0, 2, 3] +
        cams[3].Twc.mat.cpu().numpy()[0, 2, 3]
    ) / 4  #(cams_z[0] + cams_z[1] + cams_z[2] + cams_z[3]) / 4

    # create output dirs for each cam
    seq_name = get_sequence_name(input_files[0][0])
    for i_cam in range(N_cams):
        os.makedirs(os.path.join(output_folder, seq_name, 'depth',
                                 camera_names[i_cam]),
                    exist_ok=True)
        os.makedirs(os.path.join(output_folder, seq_name, 'rgb',
                                 camera_names[i_cam]),
                    exist_ok=True)

    first_pic = True
    for i_file in range(0, N_files, 10):

        load_pred_masks = False
        remove_close_points_lidar_semantic = False
        print_lidar = True

        base_0, ext_0 = os.path.splitext(
            os.path.basename(input_files[0][i_file]))
        print(base_0)

        images = []
        images_numpy = []
        predicted_masks = []
        pred_inv_depths = []
        pred_depths = []
        world_points = []
        input_depth_files = []
        has_gt_depth = []
        input_full_masks = []
        has_full_mask = []
        gt_depth = []
        gt_depth_3d = []
        pcl_full = []
        pcl_only_inliers = []
        pcl_only_outliers = []
        pcl_gt = []
        rgb = []
        viz_pred_inv_depths = []
        great_lap = []
        for i_cam in range(N_cams):
            images.append(
                load_image(input_files[i_cam][i_file]).convert('RGB'))
            images[i_cam] = resize_image(images[i_cam], image_shape)
            images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
            if torch.cuda.is_available():
                images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()),
                                                 dtype=dtype)
            if load_pred_masks:
                input_pred_mask_file = input_files[i_cam][i_file].replace(
                    'images_multiview', 'pred_mask')
                predicted_masks.append(
                    load_image(input_pred_mask_file).convert('RGB'))
                predicted_masks[i_cam] = resize_image(predicted_masks[i_cam],
                                                      image_shape)
                predicted_masks[i_cam] = to_tensor(
                    predicted_masks[i_cam]).unsqueeze(0)
                if torch.cuda.is_available():
                    predicted_masks[i_cam] = predicted_masks[i_cam].to(
                        'cuda:{}'.format(rank()), dtype=dtype)

            pred_inv_depths.append(model_wrappers[0].depth(images[i_cam]))
            pred_depths.append(inv2depth(pred_inv_depths[i_cam]))

        for i_cam in range(N_cams):
            print(i_cam)
            mix_depths = True
            if mix_depths:
                depths = (torch.ones(1, 3, 800, 1280) * 500).cuda()
                depths[0, 1, :, :] = pred_depths[i_cam][0, 0, :, :]
                # not_masked1s = torch.zeros(3, 800, 1280).to(dtype=bool)
                # not_masked1 = torch.ones(1, 3, 800, 1280).to(dtype=bool)
                for relative in [-1, 1]:
                    path_to_ego_mask_relative = get_path_to_ego_mask(
                        input_files[(i_cam + relative) % 4][0])
                    ego_mask_relative = np.load(path_to_ego_mask_relative)
                    ego_mask_relative = torch.from_numpy(
                        ego_mask_relative.astype(bool))

                    # reconstructed 3d points from relative depth map
                    relative_points_3d = cams[(i_cam + relative) %
                                              4].reconstruct(
                                                  pred_depths[(i_cam +
                                                               relative) % 4],
                                                  frame='w')

                    # cop of current cam
                    cop = np.zeros((3, 800, 1280))
                    cop[0, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, 0, 3]
                    cop[1, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, 1, 3]
                    cop[2, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, 2, 3]

                    # distances of 3d points to cop of current cam
                    distances_3d = np.linalg.norm(
                        relative_points_3d[0, :, :, :].cpu().numpy() - cop,
                        axis=0)
                    distances_3d = torch.from_numpy(distances_3d).unsqueeze(
                        0).cuda().float()

                    # projected points on current cam (values should be in (-1,1)), be careful X and Y are switched!!!
                    projected_points_2d = cams[i_cam].project(
                        relative_points_3d, frame='w')
                    projected_points_2d[:, :, :,
                                        [0, 1]] = projected_points_2d[:, :, :,
                                                                      [1, 0]]

                    # applying ego mask of relative cam
                    projected_points_2d[:, ~ego_mask_relative, :] = 2

                    # looking for indices of inbounds pixels
                    x_ok = (projected_points_2d[0, :, :, 0] >
                            -1) * (projected_points_2d[0, :, :, 0] < 1)
                    y_ok = (projected_points_2d[0, :, :, 1] >
                            -1) * (projected_points_2d[0, :, :, 1] < 1)
                    xy_ok = x_ok * y_ok
                    xy_ok_id = xy_ok.nonzero(as_tuple=False)

                    # xy values of these indices (in (-1, 1))
                    xy_ok_X = xy_ok_id[:, 0]
                    xy_ok_Y = xy_ok_id[:, 1]

                    # xy values in pixels
                    projected_points_2d_ints = (projected_points_2d + 1) / 2
                    projected_points_2d_ints[0, :, :, 0] = torch.round(
                        projected_points_2d_ints[0, :, :, 0] * 799)
                    projected_points_2d_ints[0, :, :, 1] = torch.round(
                        projected_points_2d_ints[0, :, :, 1] * 1279)
                    projected_points_2d_ints = projected_points_2d_ints.to(
                        dtype=int)

                    # main equation
                    depths[0, 1 + relative,
                           projected_points_2d_ints[0, xy_ok_X, xy_ok_Y, 0],
                           projected_points_2d_ints[0, xy_ok_X, xy_ok_Y,
                                                    1]] = distances_3d[0,
                                                                       xy_ok_X,
                                                                       xy_ok_Y]

                    interpolation = False
                    if interpolation:

                        def fillMissingValues(
                            target_for_interp,
                            copy=True,
                            interpolator=scipy.interpolate.LinearNDInterpolator
                        ):
                            import cv2, scipy, numpy as np

                            if copy:
                                target_for_interp = target_for_interp.copy()

                            def getPixelsForInterp(img):
                                """
                                Calculates a mask of pixels neighboring invalid values -
                                   to use for interpolation.
                                """
                                # mask invalid pixels
                                invalid_mask = np.isnan(img) + (img == 0)
                                kernel = cv2.getStructuringElement(
                                    cv2.MORPH_ELLIPSE, (3, 3))

                                # dilate to mark borders around invalid regions
                                dilated_mask = cv2.dilate(
                                    invalid_mask.astype('uint8'),
                                    kernel,
                                    borderType=cv2.BORDER_CONSTANT,
                                    borderValue=int(0))

                                # pixelwise "and" with valid pixel mask (~invalid_mask)
                                masked_for_interp = dilated_mask * ~invalid_mask
                                return masked_for_interp.astype(
                                    'bool'), invalid_mask

                            # Mask pixels for interpolation
                            mask_for_interp, invalid_mask = getPixelsForInterp(
                                target_for_interp)

                            # Interpolate only holes, only using these pixels
                            points = np.argwhere(mask_for_interp)
                            values = target_for_interp[mask_for_interp]
                            interp = interpolator(points, values)

                            target_for_interp[invalid_mask] = interp(
                                np.argwhere(invalid_mask))
                            return target_for_interp

                        dd = depths[0, 1 + relative, :, :].cpu().numpy()
                        dd[dd == 500] = np.nan

                        dd = fillMissingValues(dd,
                                               copy=True,
                                               interpolator=scipy.interpolate.
                                               LinearNDInterpolator)

                        dd[np.isnan(dd)] = 500
                        dd[dd == 0] = 500

                        depths[0, 1 + relative, :, :] = torch.from_numpy(
                            dd).unsqueeze(0).unsqueeze(0).cuda()

                depths[depths == 0] = 500
                #depths[depths == np.nan] = 500
                pred_depths[i_cam] = depths.min(dim=1, keepdim=True)[0]

            world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam],
                                                        frame='w'))

            pred_depth_copy = pred_depths[i_cam].squeeze(0).squeeze(
                0).cpu().numpy()
            pred_depth_copy = np.uint8(pred_depth_copy)
            lap = np.uint8(
                np.absolute(cv2.Laplacian(pred_depth_copy, cv2.CV_64F,
                                          ksize=3)))
            great_lap.append(lap < 4)
            great_lap[i_cam] = great_lap[i_cam].reshape(-1)
            images_numpy.append(images[i_cam][0].cpu().numpy())
            images_numpy[i_cam] = images_numpy[i_cam].reshape(
                (3, -1)).transpose()
            images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam] *
                                                      great_lap[i_cam]]
            if load_pred_masks:
                predicted_masks[i_cam] = predicted_masks[i_cam][0].cpu().numpy(
                )
                predicted_masks[i_cam] = predicted_masks[i_cam].reshape(
                    (3, -1)).transpose()
                predicted_masks[i_cam] = predicted_masks[i_cam][
                    not_masked[i_cam] * great_lap[i_cam]]

        for i_cam in range(N_cams):
            world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
            world_points[i_cam] = world_points[i_cam].reshape(
                (3, -1)).transpose()
            world_points[i_cam] = world_points[i_cam][not_masked[i_cam] *
                                                      great_lap[i_cam]]
            cam_name = camera_names[i_cam]
            cam_int = cam_name.split('_')[-1]
            input_depth_files.append(get_depth_file(
                input_files[i_cam][i_file]))
            has_gt_depth.append(os.path.exists(input_depth_files[i_cam]))
            if has_gt_depth[i_cam]:
                gt_depth.append(
                    np.load(input_depth_files[i_cam])['velodyne_depth'].astype(
                        np.float32))
                gt_depth[i_cam] = torch.from_numpy(
                    gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
                if torch.cuda.is_available():
                    gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(
                        rank()),
                                                         dtype=dtype)
                gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam],
                                                           frame='w'))
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy()
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape(
                    (3, -1)).transpose()
                #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]]
            else:
                gt_depth.append(0)
                gt_depth_3d.append(0)
            input_full_masks.append(
                get_full_mask_file(input_files[i_cam][i_file]))
            has_full_mask.append(os.path.exists(input_full_masks[i_cam]))

            pcl_full.append(o3d.geometry.PointCloud())
            pcl_full[i_cam].points = o3d.utility.Vector3dVector(
                world_points[i_cam])
            pcl_full[i_cam].colors = o3d.utility.Vector3dVector(
                images_numpy[i_cam])

            pcl = pcl_full[i_cam]  # .select_by_index(ind)
            points_tmp = np.asarray(pcl.points)
            colors_tmp = images_numpy[i_cam]  # np.asarray(pcl.colors)
            # remove points that are above
            mask_below = points_tmp[:, 2] < -1.0
            mask_height = points_tmp[:,
                                     2] > 1.5  # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3)
            mask_colors_blue = np.sum(
                np.abs(colors_tmp - np.array([0.6, 0.8, 1])),
                axis=1) < 0.6  # bleu ciel
            mask_colors_blue2 = np.sum(
                np.abs(colors_tmp - np.array([0.8, 1, 1])),
                axis=1) < 0.6  # bleu ciel
            mask_colors_green = np.sum(
                np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8
            mask_colors_green2 = np.sum(
                np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2
            mask_below = 1 - mask_below
            mask = 1 - mask_height * mask_colors_blue
            mask_bis = 1 - mask_height * mask_colors_blue2
            mask2 = 1 - mask_height * mask_colors_green
            mask3 = 1 - mask_height * mask_colors_green2
            mask = mask * mask_bis * mask2 * mask3 * mask_below

            if load_pred_masks:
                black_pixels = np.logical_or(
                    np.sum(np.abs(predicted_masks[i_cam] * 255 -
                                  np.array([0, 0, 0])),
                           axis=1) < 15,
                    np.sum(np.abs(predicted_masks[i_cam] * 255 -
                                  np.array([127, 127, 127])),
                           axis=1) < 20)
                #background_pixels = np.sum(np.abs(predicted_masks[i_cam]*255 - np.array([127, 127, 127])), axis=1) < 20
                ind_black_pixels = np.where(black_pixels)[0]
                #ind_background_pixels = np.where(background_pixels)[0]
                color_vector = alpha_mask * predicted_masks[i_cam] + (
                    1 - alpha_mask) * images_numpy[i_cam]
                color_vector[ind_black_pixels] = images_numpy[i_cam][
                    ind_black_pixels]
                #color_vector[ind_background_pixels] = images_numpy[i_cam][ind_background_pixels]
                pcl_full[i_cam].colors = o3d.utility.Vector3dVector(
                    color_vector)

            # if has_full_mask[i_cam]:
            #     full_mask = np.load(input_full_masks[i_cam])
            #     mask_colors = label_colors[correspondence[full_mask]].reshape((-1, 3))#.transpose()
            #     mask_colors = mask_colors[not_masked[i_cam]*great_lap[i_cam]]
            #     pcl_full[i_cam].colors = o3d.utility.Vector3dVector(alpha_mask * mask_colors + (1-alpha_mask) * images_numpy[i_cam])

            pcl = pcl_full[i_cam]  # .select_by_index(ind)

            pcl = pcl.select_by_index(np.where(mask)[0])
            cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7,
                                                     std_ratio=1.2)
            pcl = pcl.select_by_index(ind)
            pcl = pcl.voxel_down_sample(voxel_size=0.02)
            #if has_full_mask[i_cam]:
            #    pcl.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.2, max_nn=15))
            pcl_only_inliers.append(
                pcl)  #pcl_full[i_cam].select_by_index(ind)[mask])
            if has_gt_depth[i_cam]:
                pcl_gt.append(o3d.geometry.PointCloud())
                pcl_gt[i_cam].points = o3d.utility.Vector3dVector(
                    gt_depth_3d[i_cam])
                gt_inv_depth = 1 / (np.linalg.norm(
                    gt_depth_3d[i_cam] - cams_middle, axis=1) + 1e-6)
                cm = get_cmap('plasma')
                normalizer = .35  #np.percentile(gt_inv_depth, 95)
                gt_inv_depth /= (normalizer + 1e-6)
                pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(
                    cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3])
            else:
                pcl_gt.append(0)

        threshold = 0.5
        threshold2 = 0.1
        if remove_close_points_lidar_semantic:
            for i_cam in range(4):
                if has_full_mask[i_cam]:
                    for relative in [-1, 1]:
                        if not has_full_mask[(i_cam + relative) % 4]:
                            dists = pcl_only_inliers[
                                (i_cam + relative) %
                                4].compute_point_cloud_distance(
                                    pcl_only_inliers[i_cam])
                            p1 = pcl_only_inliers[
                                (i_cam + relative) % 4].select_by_index(
                                    np.where(np.asarray(dists) > threshold)[0])
                            p2 = pcl_only_inliers[
                                (i_cam + relative) % 4].select_by_index(
                                    np.where(np.asarray(dists) > threshold)[0],
                                    invert=True).uniform_down_sample(
                                        15
                                    )  #.voxel_down_sample(voxel_size=0.5)
                            pcl_only_inliers[(i_cam + relative) % 4] = p1 + p2
                if has_gt_depth[i_cam]:
                    if has_full_mask[i_cam]:
                        down = 15
                    else:
                        down = 30
                    dists = pcl_only_inliers[
                        i_cam].compute_point_cloud_distance(pcl_gt[i_cam])
                    p1 = pcl_only_inliers[i_cam].select_by_index(
                        np.where(np.asarray(dists) > threshold2)[0])
                    p2 = pcl_only_inliers[i_cam].select_by_index(
                        np.where(np.asarray(dists) > threshold2)[0],
                        invert=True).uniform_down_sample(
                            down)  #.voxel_down_sample(voxel_size=0.5)
                    pcl_only_inliers[i_cam] = p1 + p2

        if first_pic:
            for i_cam_n in range(120):
                vis_only_inliers = o3d.visualization.Visualizer()
                vis_only_inliers.create_window(visible=True,
                                               window_name='inliers' +
                                               str(i_file))
                for i_cam in range(N_cams):
                    vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
                for i, e in enumerate(pcl_gt):
                    if e != 0:
                        vis_only_inliers.add_geometry(e)
                ctr = vis_only_inliers.get_view_control()
                ctr.set_lookat(lookat_vector)
                ctr.set_front(front_vector)
                ctr.set_up(up_vector)
                ctr.set_zoom(zoom_float)
                param = o3d.io.read_pinhole_camera_parameters(
                    '/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'
                    + str(i_cam_n) + 'v3.json')
                ctr.convert_from_pinhole_camera_parameters(param)
                opt = vis_only_inliers.get_render_option()
                opt.background_color = np.asarray([0, 0, 0])
                opt.point_size = 3.0
                #opt.light_on = False
                #vis_only_inliers.update_geometry('inliers0')
                vis_only_inliers.poll_events()
                vis_only_inliers.update_renderer()
                if stop:
                    vis_only_inliers.run()
                    pcd1 = pcl_only_inliers[0] + pcl_only_inliers[
                        1] + pcl_only_inliers[2] + pcl_only_inliers[3]
                    for i_cam3 in range(4):
                        if has_gt_depth[i_cam3]:
                            pcd1 += pcl_gt[i_cam3]
                    #o3d.io.write_point_cloud(os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1)
                param = vis_only_inliers.get_view_control(
                ).convert_to_pinhole_camera_parameters()
                o3d.io.write_pinhole_camera_parameters(
                    '/home/vbelissen/Downloads/test.json', param)
                image = vis_only_inliers.capture_screen_float_buffer(False)
                plt.imsave(os.path.join(output_folder, seq_name, 'pcl', base_0,
                                        str(i_cam_n) + '.png'),
                           np.asarray(image),
                           dpi=1)
                vis_only_inliers.destroy_window()
                del ctr
                del vis_only_inliers
                del opt
            first_pic = False

        i_cam2 = 0
        #for i_cam2 in range(4):
        #for suff in ['', 'bis', 'ter']:
        suff = ''
        vis_only_inliers = o3d.visualization.Visualizer()
        vis_only_inliers.create_window(visible=True,
                                       window_name='inliers' + str(i_file))
        for i_cam in range(N_cams):
            vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
        if print_lidar:
            for i, e in enumerate(pcl_gt):
                if e != 0:
                    vis_only_inliers.add_geometry(e)
        ctr = vis_only_inliers.get_view_control()
        ctr.set_lookat(lookat_vector)
        ctr.set_front(front_vector)
        ctr.set_up(up_vector)
        ctr.set_zoom(zoom_float)
        param = o3d.io.read_pinhole_camera_parameters(
            '/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_' +
            str(119) + 'v3.json')
        ctr.convert_from_pinhole_camera_parameters(param)
        opt = vis_only_inliers.get_render_option()
        opt.background_color = np.asarray([0, 0, 0])
        opt.point_size = 3.0
        #opt.light_on = False
        #vis_only_inliers.update_geometry('inliers0')
        vis_only_inliers.poll_events()
        vis_only_inliers.update_renderer()
        if stop:
            vis_only_inliers.run()
            pcd1 = pcl_only_inliers[0] + pcl_only_inliers[
                1] + pcl_only_inliers[2] + pcl_only_inliers[3]
            for i_cam3 in range(4):
                if has_gt_depth[i_cam3]:
                    pcd1 += pcl_gt[i_cam3]
            if i_cam2 == 0 and suff == '':
                o3d.io.write_point_cloud(
                    os.path.join(output_folder, seq_name, 'open3d',
                                 base_0 + '.pcd'), pcd1)
        #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
        #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
        image = vis_only_inliers.capture_screen_float_buffer(False)
        plt.imsave(os.path.join(
            output_folder, seq_name, 'pcl', 'normal',
            str(i_cam2) + suff,
            base_0 + '_normal_' + str(i_cam2) + suff + '.png'),
                   np.asarray(image),
                   dpi=1)
        vis_only_inliers.destroy_window()
        del ctr
        del vis_only_inliers
        del opt

        for i_cam in range(N_cams):
            rgb.append(
                images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255)
            viz_pred_inv_depths.append(
                viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.8) * 255)
            viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape)
                                       == 0] = 0
            concat = np.concatenate([rgb[i_cam], viz_pred_inv_depths[i_cam]],
                                    0)
            # Save visualization
            output_file1 = os.path.join(
                output_folder, seq_name, 'depth', camera_names[i_cam],
                os.path.basename(input_files[i_cam][i_file]))
            output_file2 = os.path.join(
                output_folder, seq_name, 'rgb', camera_names[i_cam],
                os.path.basename(input_files[i_cam][i_file]))
            imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1])
            # if has_full_mask[i_cam]:
            #     full_mask = np.load(input_full_masks[i_cam])
            #     mask_colors = label_colors[correspondence[full_mask]]
            #     imwrite(output_file2, (1-alpha_mask) * rgb[i_cam][:, :, ::-1] + alpha_mask * mask_colors[:, :, ::-1]*255)
            # else:
            imwrite(output_file2, rgb[i_cam][:, :, ::-1])
예제 #17
0
def infer_optimal_calib(input_files, model_wrappers, image_shape):
    """
    Process a list of input files to infer correction in extrinsic calibration.
    Files should all correspond to the same car.
    Number of cameras is assumed to be 4 or 5.

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    model_wrappers : nn.Module
        Model wrappers used for inference
    image_shape : Image shape
        Input image shape
    """
    N_files = len(input_files[0])
    N_cams = len(input_files)
    image_area = image_shape[0] * image_shape[1]

    camera_context_pairs = CAMERA_CONTEXT_PAIRS[N_cams]

    # Rotation will be optimized if not all cams are frozen
    optimize_rotation = (args.frozen_cams_rot != [i for i in range(N_cams)])

    # Rotation will be optimized if not all cams are frozen
    optimize_translation = (args.frozen_cams_trans !=
                            [i for i in range(N_cams)])

    calib_data = {}
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str = get_split_type(input_files[i_cam][0])
        seq_name_str = get_sequence_name(input_files[i_cam][0])
        camera_str = get_camera_name(input_files[i_cam][0])
        calib_data[camera_str] = read_raw_calib_files_camera_valeo_with_suffix(
            base_folder_str, split_type_str, seq_name_str, camera_str,
            args.calibrations_suffix)

    cams = []
    cams_untouched = []
    not_masked = []

    # Assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors, K, k, p = get_full_intrinsics(
            input_files[i_cam][0], calib_data)

        poly_coeffs_untouched = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point_untouched = torch.from_numpy(
            principal_point).unsqueeze(0)
        scale_factors_untouched = torch.from_numpy(scale_factors).unsqueeze(0)
        K_untouched = torch.from_numpy(K).unsqueeze(0)
        k_untouched = torch.from_numpy(k).unsqueeze(0)
        p_untouched = torch.from_numpy(p).unsqueeze(0)
        pose_matrix_untouched = torch.from_numpy(
            get_extrinsics_pose_matrix(input_files[i_cam][0],
                                       calib_data)).unsqueeze(0)
        pose_tensor_untouched = Pose(pose_matrix_untouched)
        camera_type_untouched = get_camera_type(input_files[i_cam][0],
                                                calib_data)
        camera_type_int_untouched = torch.tensor(
            [get_camera_type_int(camera_type_untouched)])

        cams.append(
            CameraMultifocal(poly_coeffs=poly_coeffs_untouched.float(),
                             principal_point=principal_point_untouched.float(),
                             scale_factors=scale_factors_untouched.float(),
                             K=K_untouched.float(),
                             k1=k_untouched[:, 0].float(),
                             k2=k_untouched[:, 1].float(),
                             k3=k_untouched[:, 2].float(),
                             p1=p_untouched[:, 0].float(),
                             p2=p_untouched[:, 1].float(),
                             camera_type=camera_type_int_untouched,
                             Tcw=pose_tensor_untouched))

        cams_untouched.append(
            CameraMultifocal(poly_coeffs=poly_coeffs_untouched.float(),
                             principal_point=principal_point_untouched.float(),
                             scale_factors=scale_factors_untouched.float(),
                             K=K_untouched.float(),
                             k1=k_untouched[:, 0].float(),
                             k2=k_untouched[:, 1].float(),
                             k3=k_untouched[:, 2].float(),
                             p1=p_untouched[:, 0].float(),
                             p2=p_untouched[:, 1].float(),
                             camera_type=camera_type_int_untouched,
                             Tcw=pose_tensor_untouched))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()))
            cams_untouched[i_cam] = cams_untouched[i_cam].to('cuda:{}'.format(
                rank()))

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(
            torch.from_numpy(ego_mask.astype(float)).cuda().float())

    # Learning variables
    extra_trans_m = [
        torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True)
        for _ in range(N_cams)
    ]
    extra_rot_deg = [
        torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True)
        for _ in range(N_cams)
    ]

    # Constraints: translation
    frozen_cams_trans = args.frozen_cams_trans
    if frozen_cams_trans is not None:
        for i_cam in frozen_cams_trans:
            extra_trans_m[i_cam].requires_grad = False
    # Constraints: rotation
    frozen_cams_rot = args.frozen_cams_rot
    if frozen_cams_rot is not None:
        for i_cam in frozen_cams_rot:
            extra_rot_deg[i_cam].requires_grad = False

    # Parameters from argument parser
    save_pictures = args.save_pictures
    n_epochs = args.n_epochs
    learning_rate = args.lr
    step_size = args.scheduler_step_size
    gamma = args.scheduler_gamma

    # Table of loss
    loss_tab = np.zeros(n_epochs)

    # Table of extra rotation values
    extra_rot_values_tab = np.zeros((N_cams * 3, N_files * n_epochs))

    # Table of extra translation values
    extra_trans_values_tab = np.zeros((N_cams * 3, N_files * n_epochs))

    # Optimizer
    optimizer = optim.Adam(extra_trans_m + extra_rot_deg, lr=learning_rate)

    # Scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=step_size,
                                          gamma=gamma)

    # Regularization weights
    regul_weight_trans = torch.tensor(args.regul_weight_trans).cuda()
    regul_weight_rot = torch.tensor(args.regul_weight_rot).cuda()
    regul_weight_overlap = torch.tensor(args.regul_weight_overlap).cuda()

    # Loop on the number of epochs
    count = 0
    for epoch in range(n_epochs):
        print('Epoch ' + str(epoch) + '/' + str(n_epochs))

        # Initialize loss
        loss_sum = 0

        # Loop on the number of files
        for i_file in range(N_files):

            print('')
            # Filename for camera 0
            base_0, ext_0 = os.path.splitext(
                os.path.basename(input_files[0][i_file]))
            print(base_0)

            # Initialize list of tensors: images, predicted inverse depths and predicted depths
            images, pred_inv_depths, pred_depths = [], [], []
            input_depth_files, has_gt_depth, gt_depth, gt_inv_depth = [], [], [], []
            nb_gt_depths = 0

            # Reset camera poses Twc
            CameraMultifocal.Twc.fget.cache_clear()

            # Loop on cams and predict depth
            for i_cam in range(N_cams):
                images.append(
                    load_image(input_files[i_cam][i_file]).convert('RGB'))
                images[i_cam] = resize_image(images[i_cam], image_shape)
                images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
                if torch.cuda.is_available():
                    images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()))
                with torch.no_grad():
                    pred_inv_depths.append(model_wrappers[i_cam].depth(
                        images[i_cam]))
                    pred_depths.append(inv2depth(pred_inv_depths[i_cam]))

                if args.use_lidar:
                    input_depth_files.append(
                        get_depth_file(input_files[i_cam][i_file],
                                       args.depth_suffix))
                    has_gt_depth.append(
                        os.path.exists(input_depth_files[i_cam]))
                    if has_gt_depth[i_cam]:
                        nb_gt_depths += 1
                        gt_depth.append(
                            np.load(input_depth_files[i_cam])
                            ['velodyne_depth'].astype(np.float32))
                        gt_depth[i_cam] = torch.from_numpy(
                            gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
                        gt_inv_depth.append(depth2inv(gt_depth[i_cam]))
                        if torch.cuda.is_available():
                            gt_depth[i_cam] = gt_depth[i_cam].to(
                                'cuda:{}'.format(rank()))
                            gt_inv_depth[i_cam] = gt_inv_depth[i_cam].to(
                                'cuda:{}'.format(rank()))
                    else:
                        gt_depth.append(0)
                        gt_inv_depth.append(0)

                # Apply correction on cams
                pose_matrix = get_extrinsics_pose_matrix_extra_trans_rot_torch(
                    input_files[i_cam][i_file], calib_data,
                    extra_trans_m[i_cam], extra_rot_deg[i_cam]).unsqueeze(0)
                pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()))
                cams[i_cam].Tcw = pose_tensor

            # Define a loss function between 2 images
            def photo_loss_2imgs(i_cam1, i_cam2, save_pictures):
                # Computes the photometric loss between 2 images of adjacent cameras
                # It reconstructs each image from the adjacent one, applying correction in rotation and translation

                # Reconstruct 3D points for each cam
                world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1],
                                                         frame='w')
                world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2],
                                                         frame='w')

                # Get coordinates of projected points on other cam
                ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w')
                ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w')

                # Reconstruct each image from the adjacent camera
                reconstructedImg2to1 = funct.grid_sample(images[i_cam2] *
                                                         not_masked[i_cam2],
                                                         ref_coords1to2,
                                                         mode='bilinear',
                                                         padding_mode='zeros',
                                                         align_corners=True)
                reconstructedImg1to2 = funct.grid_sample(images[i_cam1] *
                                                         not_masked[i_cam1],
                                                         ref_coords2to1,
                                                         mode='bilinear',
                                                         padding_mode='zeros',
                                                         align_corners=True)
                # Save pictures if requested
                if save_pictures:
                    # Save original files if first epoch
                    if epoch == 0:
                        cv2.imwrite(
                            args.save_folder + '/cam_' + str(i_cam1) +
                            '_file_' + str(i_file) + '_orig.png',
                            (images[i_cam1][0].permute(1, 2, 0)
                             )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255)
                        cv2.imwrite(
                            args.save_folder + '/cam_' + str(i_cam2) +
                            '_file_' + str(i_file) + '_orig.png',
                            (images[i_cam2][0].permute(1, 2, 0)
                             )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255)
                    # Save reconstructed images
                    cv2.imwrite(
                        args.save_folder + '/epoch_' + str(epoch) + '_file_' +
                        str(i_file) + '_cam_' + str(i_cam1) + '_recons_from_' +
                        str(i_cam2) + '.png',
                        ((reconstructedImg2to1 *
                          not_masked[i_cam1])[0].permute(1, 2, 0)
                         )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255)
                    cv2.imwrite(
                        args.save_folder + '/epoch_' + str(epoch) + '_file_' +
                        str(i_file) + '_cam_' + str(i_cam2) + '_recons_from_' +
                        str(i_cam1) + '.png',
                        ((reconstructedImg1to2 *
                          not_masked[i_cam2])[0].permute(1, 2, 0)
                         )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255)

                # L1 loss
                l1_loss_1 = torch.abs(images[i_cam1] * not_masked[i_cam1] -
                                      reconstructedImg2to1 *
                                      not_masked[i_cam1])
                l1_loss_2 = torch.abs(images[i_cam2] * not_masked[i_cam2] -
                                      reconstructedImg1to2 *
                                      not_masked[i_cam2])

                # SSIM loss
                ssim_loss_weight = 0.85
                ssim_loss_1 = SSIM(images[i_cam1] * not_masked[i_cam1],
                                   reconstructedImg2to1 * not_masked[i_cam1],
                                   C1=1e-4,
                                   C2=9e-4,
                                   kernel_size=3)
                ssim_loss_2 = SSIM(images[i_cam2] * not_masked[i_cam2],
                                   reconstructedImg1to2 * not_masked[i_cam2],
                                   C1=1e-4,
                                   C2=9e-4,
                                   kernel_size=3)

                ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.)
                ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.)

                # Photometric loss: alpha * ssim + (1 - alpha) * l1
                photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(
                    1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(
                        1, True)
                photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(
                    1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(
                        1, True)

                # Compute the number of valid pixels
                mask1 = (reconstructedImg2to1 * not_masked[i_cam1]).sum(
                    axis=1, keepdim=True) != 0
                s1 = mask1.sum().float()
                mask2 = (reconstructedImg1to2 * not_masked[i_cam2]).sum(
                    axis=1, keepdim=True) != 0
                s2 = mask2.sum().float()

                # Compute the photometric losses weighed by the number of valid pixels
                loss_1 = (photometric_loss_1 *
                          mask1).sum() / s1 if s1 > 0 else 0
                loss_2 = (photometric_loss_2 *
                          mask2).sum() / s2 if s2 > 0 else 0

                # The final loss can be regularized to encourage a similar overlap between images
                if s1 > 0 and s2 > 0:
                    return loss_1 + loss_2 + regul_weight_overlap * image_area * (
                        1 / s1 + 1 / s2)
                else:
                    return 0.

            def lidar_loss(i_cam1, save_pictures):
                if args.use_lidar and has_gt_depth[i_cam1]:
                    mask_zeros_lidar = (
                        gt_depth[i_cam1][0, 0, :, :] == 0).detach()

                    # Ground truth sparse depth maps were generated using the untouched camera extrinsics
                    world_points_gt_oldCalib = cams_untouched[
                        i_cam1].reconstruct(gt_depth[i_cam1], frame='w')
                    world_points_gt_oldCalib[0, 0, mask_zeros_lidar] = 0.

                    # Get coordinates of projected points on new cam
                    ref_coords = cams[i_cam1].project(world_points_gt_oldCalib,
                                                      frame='w')
                    ref_coords[0, mask_zeros_lidar, :] = 0.

                    # Reconstruct projected lidar from the new camera
                    reprojected_gt_inv_depth = funct.grid_sample(
                        gt_inv_depth[i_cam1],
                        ref_coords,
                        mode='nearest',
                        padding_mode='zeros',
                        align_corners=True)
                    reprojected_gt_inv_depth[0, 0, mask_zeros_lidar] = 0.

                    mask_reprojected = (reprojected_gt_inv_depth > 0.).detach()
                    if save_pictures:
                        mask_reprojected_numpy = mask_reprojected[
                            0, 0, :, :].cpu().numpy()
                        u = np.where(mask_reprojected_numpy)[0]
                        v = np.where(mask_reprojected_numpy)[1]
                        n_lidar = u.size
                        reprojected_gt_depth_numpy = inv2depth(
                            reprojected_gt_inv_depth)[
                                0, 0, :, :].detach().cpu().numpy()

                        im = (images[i_cam1][0].permute(
                            1, 2, 0))[:, :,
                                      [2, 1, 0]].detach().cpu().numpy() * 255
                        dmax = 100.
                        for i_l in range(n_lidar):
                            d = reprojected_gt_depth_numpy[u[i_l], v[i_l]]
                            s = int((8 / d)) + 1
                            im[u[i_l] - s:u[i_l] + s, v[i_l] - s:v[i_l] + s,
                               0] = np.clip(
                                   np.power(d / dmax, .7) * 255, 10, 245)
                            im[u[i_l] - s:u[i_l] + s, v[i_l] - s:v[i_l] + s,
                               1] = np.clip(
                                   np.power((dmax - d) / dmax, 4.0) * 255, 10,
                                   245)
                            im[u[i_l] - s:u[i_l] + s, v[i_l] - s:v[i_l] + s,
                               2] = np.clip(
                                   np.power(np.abs(2 * (d - .5 * dmax) / dmax),
                                            3.0) * 255, 10, 245)

                        cv2.imwrite(
                            args.save_folder + '/epoch_' + str(epoch) +
                            '_file_' + str(i_file) + '_cam_' + str(i_cam1) +
                            '_lidar.png', im)

                    if mask_reprojected.sum() > 0:
                        return l1_lidar_loss(
                            pred_inv_depths[i_cam1] * not_masked[i_cam1],
                            reprojected_gt_inv_depth * not_masked[i_cam1])
                    else:
                        return 0.
                else:
                    return 0.

            if nb_gt_depths > 0:
                final_lidar_weight = (N_cams /
                                      nb_gt_depths) * args.lidar_weight
            else:
                final_lidar_weight = 0.

            # The final loss consists of summing the photometric loss of all pairs of adjacent cameras
            # and is regularized to prevent weights from exploding
            photo_loss = 1.0 * sum([
                photo_loss_2imgs(p[0], p[1], save_pictures)
                for p in camera_context_pairs
            ])
            regul_rot_loss = regul_weight_rot * sum(
                [(extra_rot_deg[i]**2).sum() for i in range(N_cams)])
            regul_trans_loss = regul_weight_trans * sum(
                [(extra_trans_m[i]**2).sum() for i in range(N_cams)])
            lidar_gt_loss = final_lidar_weight * sum(
                [lidar_loss(i, save_pictures) for i in range(N_cams)])

            loss = photo_loss + regul_rot_loss + regul_trans_loss + lidar_gt_loss

            with torch.no_grad():
                extra_rot_deg_before = []
                extra_trans_m_before = []
                for i_cam in range(N_cams):
                    extra_rot_deg_before.append(extra_rot_deg[i_cam].clone())
                    extra_trans_m_before.append(extra_trans_m[i_cam].clone())

            # Optimization steps
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                extra_rot_deg_after = []
                extra_trans_m_after = []
                for i_cam in range(N_cams):
                    extra_rot_deg_after.append(extra_rot_deg[i_cam].clone())
                    extra_trans_m_after.append(extra_trans_m[i_cam].clone())

                rot_change_file = 0.
                trans_change_file = 0.

                for i_cam in range(N_cams):
                    rot_change_file += torch.abs(
                        extra_rot_deg_after[i_cam] -
                        extra_rot_deg_before[i_cam]).mean().item()
                    trans_change_file += torch.abs(
                        extra_trans_m_after[i_cam] -
                        extra_trans_m_before[i_cam]).mean().item()

                rot_change_file /= N_cams
                trans_change_file /= N_cams

                print('Average rotation change (deg.): ' +
                      "{:.4f}".format(rot_change_file))
                print('Average translation change (m.): ' +
                      "{:.4f}".format(trans_change_file))

            # Save correction values and print loss
            with torch.no_grad():
                loss_sum += loss.item()
                for i_cam in range(N_cams):
                    for j in range(3):
                        if optimize_rotation:
                            extra_rot_values_tab[
                                3 * i_cam + j,
                                count] = extra_rot_deg[i_cam][j].item()
                        if optimize_translation:
                            extra_trans_values_tab[
                                3 * i_cam + j,
                                count] = extra_trans_m[i_cam][j].item()
                print('Loss: ' + "{:.3f}".format(loss.item()) \
                      + ' (photometric: ' + "{:.3f}".format(photo_loss.item()) \
                      + ', rotation reg.: ' + "{:.4f}".format(regul_rot_loss.item()) \
                      + ', translation reg.: ' + "{:.4f}".format(regul_trans_loss.item())
                      + ', lidar: ' + "{:.3f}".format(lidar_gt_loss) +')')
                if nb_gt_depths > 0:
                    print('Number of ground truth lidar maps: ' +
                          str(nb_gt_depths))

            count += 1

        # Update scheduler
        print('Epoch:', epoch, 'LR:', scheduler.get_lr())
        scheduler.step()
        with torch.no_grad():
            print('End of epoch')
            if optimize_translation:
                print('New translation correction values: ')
                print(extra_trans_m)
            if optimize_rotation:
                print('New rotation correction values: ')
                print(extra_rot_deg)
            print('Average rotation change in epoch:')

        loss_tab[epoch] = loss_sum / N_files

    # Plot/save loss if requested
    plt.figure()
    plt.plot(loss_tab)
    if args.show_plots:
        plt.show()
    if args.save_plots:
        plt.savefig(
            os.path.join(args.save_folder,
                         get_sequence_name(input_files[0][0]) + '_loss.png'))

    # Plot/save correction values if requested
    if optimize_rotation:
        plt.figure()
        for j in range(N_cams * 3):
            plt.plot(extra_rot_values_tab[j])
        if args.show_plots:
            plt.show()
        if args.save_plots:
            plt.savefig(
                os.path.join(
                    args.save_folder,
                    get_sequence_name(input_files[0][0]) + '_extra_rot.png'))

    if optimize_translation:
        plt.figure()
        for j in range(N_cams * 3):
            plt.plot(extra_trans_values_tab[j])
        if args.show_plots:
            plt.show()
        if args.save_plots:
            plt.savefig(
                os.path.join(
                    args.save_folder,
                    get_sequence_name(input_files[0][0]) + '_extra_trans.png'))

    # Save correction values table if requested
    if args.save_rot_tab:
        np.save(
            os.path.join(args.save_folder,
                         get_sequence_name(input_files[0][0]) +
                         '_rot_tab.npy'), extra_rot_values_tab)
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    N_cams = len(input_files)
    N_files = len(input_files[0])

    camera_names = []
    for i_cam in range(N_cams):
        camera_names.append(get_camera_name(input_files[i_cam][0]))

    cams = []
    not_masked = []

    cams_x = []
    cams_y = []

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5))

    # let's assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str  = get_split_type(input_files[i_cam][0])
        seq_name_str    = get_sequence_name(input_files[i_cam][0])
        camera_str      = get_camera_name(input_files[i_cam][0])

        calib_data = {}
        calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str)

        cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m']))
        cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m']))

        path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0])
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors = get_intrinsics(input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)

        cams.append(CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                             path_to_ego_mask=[path_to_ego_mask],
                             poly_coeffs=poly_coeffs.float(),
                             principal_point=principal_point.float(),
                             scale_factors=scale_factors.float(),
                             Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(ego_mask.astype(bool).reshape(-1))


    # create output dirs for each cam
    seq_name = get_sequence_name(input_files[0][0])
    for i_cam in range(N_cams):
        os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True)
        os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True)


    i_file=0
    values = np.arange(1.0,15.0,0.5)
    N_values = values.size
    nb_points = np.zeros((N_values, 4))
    n = 0
    for K in values:
        print(K)

        base_0, ext_0 = os.path.splitext(os.path.basename(input_files[0][i_file]))
        print(base_0)

        images = []
        images_numpy = []
        pred_inv_depths = []
        pred_depths = []
        world_points = []
        input_depth_files = []
        has_gt_depth = []
        gt_depth = []
        gt_depth_3d = []
        pcl_full = []
        pcl_only_inliers = []
        pcl_only_outliers = []
        pcl_gt = []
        rgb = []
        viz_pred_inv_depths = []
        for i_cam in range(N_cams):
            images.append(load_image(input_files[i_cam][i_file]).convert('RGB'))
            images[i_cam] = resize_image(images[i_cam], image_shape)
            images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
            if torch.cuda.is_available():
                images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)
            images_numpy.append(images[i_cam][0].cpu().numpy())
            images_numpy[i_cam] = images_numpy[i_cam].reshape((3, -1)).transpose()
            images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]]
            pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam]))
            pred_depths.append(inv2depth(pred_inv_depths[i_cam]))
            world_points.append(cams[i_cam].reconstruct(K*pred_depths[i_cam], frame='w'))
            world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
            world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose()
            world_points[i_cam] = world_points[i_cam][not_masked[i_cam]]
            cam_name = camera_names[i_cam]
            cam_int = cam_name.split('_')[-1]
            input_depth_files.append(get_depth_file(input_files[i_cam][i_file]))
            has_gt_depth.append(os.path.exists(input_depth_files[i_cam]))
            if has_gt_depth[i_cam]:
                gt_depth.append(np.load(input_depth_files[i_cam])['velodyne_depth'].astype(np.float32))
                gt_depth[i_cam] = torch.from_numpy(gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
                if torch.cuda.is_available():
                    gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)
                gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w'))
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy()
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape((3, -1)).transpose()
                #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]]
            else:
                gt_depth.append(0)
                gt_depth_3d.append(0)
            pcl_full.append(o3d.geometry.PointCloud())
            pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam])
            pcl_full[i_cam].colors = o3d.utility.Vector3dVector(images_numpy[i_cam])


            pcl = pcl_full[i_cam]#.select_by_index(ind)
            points_tmp = np.asarray(pcl.points)
            colors_tmp = np.asarray(pcl.colors)
            # remove points that are above
            mask_height = points_tmp[:, 2] > 2.5# * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3)
            mask_colors_blue = np.sum(np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6  # bleu ciel
            mask_colors_green = np.sum(np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8
            mask_colors_green2 = np.sum(np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2
            mask = 1-mask_height*mask_colors_blue
            mask2 = 1-mask_height*mask_colors_green
            mask3 = 1- mask_height*mask_colors_green2
            #maskY = points_tmp[:, 1] > 1
            mask = mask*mask2*mask3#*maskY
            pcl = pcl.select_by_index(np.where(mask)[0])
            cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.4)
            pcl = pcl.select_by_index(ind)
            pcl_only_inliers.append(pcl)#pcl_full[i_cam].select_by_index(ind)[mask])
            #pcl_only_outliers.append(pcl_full[i_cam].select_by_index(ind, invert=True))
            #pcl_only_outliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0])
            if has_gt_depth[i_cam]:
                pcl_gt.append(o3d.geometry.PointCloud())
                pcl_gt[i_cam].points = o3d.utility.Vector3dVector(gt_depth_3d[i_cam])
                gt_inv_depth = 1 / (np.linalg.norm(gt_depth_3d[i_cam], axis=1) + 1e-6)
                cm = get_cmap('plasma')
                normalizer = .35#np.percentile(gt_inv_depth, 95)
                gt_inv_depth /= (normalizer + 1e-6)
                #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape)  # [:, :3]

                pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3])
            else:
                pcl_gt.append(0)
        #o3d.visualization.draw_geometries(pcl_full + [e for i, e in enumerate(pcl_gt) if e != 0])
        color_cam = True
        if color_cam:
            for i_cam in range(4):
                if i_cam == 0:
                    pcl_only_inliers[i_cam].paint_uniform_color([1.0, 0.0, 0.0])#.colors = o3d.utility.Vector3dVector(images_numpy[i_cam])
                elif i_cam == 1:
                    pcl_only_inliers[i_cam].paint_uniform_color([0.0, 1.0, 0.0])
                elif i_cam == 2:
                    pcl_only_inliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0])
                elif i_cam == 3:
                    pcl_only_inliers[i_cam].paint_uniform_color([0.3, 0.4, 0.3])
        dist_total = np.zeros(4)
        for i_cam in range(4):
            pcl1 = pcl_only_inliers[i_cam % 4].uniform_down_sample(10)
            pcl2 = pcl_only_inliers[(i_cam+1) % 4].uniform_down_sample(10)
            points_tmp1 = np.asarray(pcl1.points)
            points_tmp2 = np.asarray(pcl2.points)
            if i_cam == 3 or i_cam == 0: # comparaison entre 3 et 0 ou entre 0 et 1
                maskX1 = points_tmp1[:, 0] > cams_x[0]
                maskX2 = points_tmp2[:, 0] > cams_x[0]
            if i_cam == 1 or i_cam == 2: # comparaison entre 3 et 0 ou entre 0 et 1
                maskX1 = points_tmp1[:, 0] < cams_x[2]
                maskX2 = points_tmp2[:, 0] < cams_x[2]
            if i_cam == 2 or i_cam == 3: # comparaison entre 3 et 0 ou entre 0 et 1
                maskY1 = points_tmp1[:, 1] > cams_y[3]
                maskY2 = points_tmp2[:, 1] > cams_y[3]
            if i_cam == 0 or i_cam == 1: # comparaison entre 3 et 0 ou entre 0 et 1
                maskY1 = points_tmp1[:, 1] < cams_y[1]
                maskY2 = points_tmp2[:, 1] < cams_y[1]
            mask_far1 = np.sqrt(np.square(points_tmp1[:, 0] - (cams_x[0] + cams_x[2])/2) + np.square(points_tmp1[:, 1] - (cams_y[1] + cams_y[3])/2)) < 5
            mask_far2 = np.sqrt(np.square(points_tmp2[:, 0] - (cams_x[0] + cams_x[2]) / 2) + np.square(points_tmp2[:, 1] - (cams_y[1] + cams_y[3]) / 2)) < 5
            pcl1 = pcl1.select_by_index(np.where(maskX1 * maskY1 * mask_far1)[0])
            pcl2 = pcl2.select_by_index(np.where(maskX2 * maskY2 * mask_far2)[0])
            o3d.visualization.draw_geometries([pcl1, pcl2])
            dists = pcl1.compute_point_cloud_distance(pcl2)
            nb_points[n, i_cam] = np.sum(np.asarray(dists)<0.5)
            print(nb_points[n, i_cam])
            dists = np.mean(np.asarray(dists))
            print(dists)
            dist_total[i_cam] = dists
        print(np.mean(dist_total))

        # vis_full = o3d.visualization.Visualizer()
        # vis_full.create_window(visible = True, window_name = 'full'+str(i_file))
        # for i_cam in range(N_cams):
        #     vis_full.add_geometry(pcl_full[i_cam])
        # for i, e in enumerate(pcl_gt):
        #     if e != 0:
        #         vis_full.add_geometry(e)
        # ctr = vis_full.get_view_control()
        # ctr.set_lookat(lookat_vector)
        # ctr.set_front(front_vector)
        # ctr.set_up(up_vector)
        # ctr.set_zoom(zoom_float)
        # #vis_full.run()
        # vis_full.destroy_window()
        i_cam2=0
        suff=''
        vis_only_inliers = o3d.visualization.Visualizer()
        vis_only_inliers.create_window(visible = True, window_name = 'inliers'+str(i_file))
        for i_cam in range(N_cams):
            vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
        for i, e in enumerate(pcl_gt):
            if e != 0:
                vis_only_inliers.add_geometry(e)
        ctr = vis_only_inliers.get_view_control()
        ctr.set_lookat(lookat_vector)
        ctr.set_front(front_vector)
        ctr.set_up(up_vector)
        ctr.set_zoom(zoom_float)
        param = o3d.io.read_pinhole_camera_parameters('/home/vbelissen/Downloads/test/cameras_jsons/test'+str(i_cam2+1)+suff+'.json')
        ctr.convert_from_pinhole_camera_parameters(param)
        opt = vis_only_inliers.get_render_option()
        opt.background_color = np.asarray([0, 0, 0])
        #vis_only_inliers.update_geometry('inliers0')
        vis_only_inliers.poll_events()
        vis_only_inliers.update_renderer()
        if stop:
            vis_only_inliers.run()
        #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
        #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
        vis_only_inliers.destroy_window()
        del ctr
        del vis_only_inliers
        del opt
        n += 1

    plt.plot(values, nb_points)
    plt.show()
    plt.close()
    plt.plot(values, np.sum(nb_points,axis=1))
    plt.show()
예제 #19
0
            def photo_loss_2imgs(i_cam1, i_cam2, extra_trans_list, extra_rot_list, save_pictures):
                # Computes the photometric loss between 2 images of adjacent cameras
                # It reconstructs each image from the adjacent one, applying correction in rotation and translation

                # Apply correction on two cams
                for i, i_cam in enumerate([i_cam1, i_cam2]):
                    pose_matrix = get_extrinsics_pose_matrix_extra_trans_rot_torch(input_files[i_cam][i_file],
                                                                                   calib_data,
                                                                                   extra_trans_list[i],
                                                                                   extra_rot_list[i]).unsqueeze(0)
                    pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()))
                    CameraMultifocal.Twc.fget.cache_clear()
                    cams[i_cam].Tcw = pose_tensor

                # Reconstruct 3D points for each cam
                world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w')
                world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w')

                # Get coordinates of projected points on other cam
                ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w')
                ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w')

                # Reconstruct each image from the adjacent camera
                reconstructedImg2to1 = funct.grid_sample(images[i_cam2]*not_masked[i_cam2],
                                                         ref_coords1to2,
                                                         mode='bilinear', padding_mode='zeros', align_corners=True)
                reconstructedImg1to2 = funct.grid_sample(images[i_cam1]*not_masked[i_cam1],
                                                         ref_coords2to1,
                                                         mode='bilinear', padding_mode='zeros', align_corners=True)
                # Save pictures if requested
                if save_pictures:
                    # Save original files if first epoch
                    if epoch == 0:
                        cv2.imwrite(args.save_folder + '/cam_' + str(i_cam1) + '_file_' + str(i_file) + '_orig.png',
                                    (images[i_cam1][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)
                        cv2.imwrite(args.save_folder + '/cam_' + str(i_cam2) + '_file_' + str(i_file) + '_orig.png',
                                    (images[i_cam2][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)
                    # Save reconstructed images
                    cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam1) + '_recons_from_' + str(i_cam2) + '.png',
                                ((reconstructedImg2to1*not_masked[i_cam1])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)
                    cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam2) + '_recons_from_' + str(i_cam1) + '.png',
                                ((reconstructedImg1to2*not_masked[i_cam2])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)

                # L1 loss
                l1_loss_1 = torch.abs(images[i_cam1]*not_masked[i_cam1] - reconstructedImg2to1*not_masked[i_cam1])
                l1_loss_2 = torch.abs(images[i_cam2]*not_masked[i_cam2] - reconstructedImg1to2*not_masked[i_cam2])

                # SSIM loss
                ssim_loss_weight = 0.85
                ssim_loss_1 = SSIM(images[i_cam1]*not_masked[i_cam1],
                                   reconstructedImg2to1*not_masked[i_cam1],
                                   C1=1e-4, C2=9e-4, kernel_size=3)
                ssim_loss_2 = SSIM(images[i_cam2]*not_masked[i_cam2],
                                   reconstructedImg1to2*not_masked[i_cam2],
                                   C1=1e-4, C2=9e-4, kernel_size=3)

                ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.)
                ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.)

                # Photometric loss: alpha * ssim + (1 - alpha) * l1
                photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(1, True)
                photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(1, True)

                # Compute the number of valid pixels
                mask1 = (reconstructedImg2to1 * not_masked[i_cam1]).sum(axis=1, keepdim=True) != 0
                s1 = mask1.sum().float()
                mask2 = (reconstructedImg1to2 * not_masked[i_cam2]).sum(axis=1, keepdim=True) != 0
                s2 = mask2.sum().float()

                # Compute the photometric losses weighed by the number of valid pixels
                loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0
                loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0

                # The final loss can be regularized to encourage a similar overlap between images
                if s1 > 0 and s2 > 0:
                    return loss_1 + loss_2 + regul_weight_overlap * image_area * (1/s1 + 1/s2)
                else:
                    return 0.
예제 #20
0
def infer_optimal_calib(input_files, model_wrappers, image_shape):
    """
    Process a list of input files to infer correction in extrinsic calibration.
    Files should all correspond to the same car.
    Number of cameras is assumed to be 4.
    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    model_wrappers : nn.Module
        Model wrappers used for inference
    image_shape : Image shape
        Input image shape
    """
    N_files = len(input_files[0])
    N_cams = len(input_files)
    image_area = image_shape[0] * image_shape[1]

    camera_context_pairs = CAMERA_CONTEXT_PAIRS[N_cams]

    calib_data = {}
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str  = get_split_type(input_files[i_cam][0])
        seq_name_str    = get_sequence_name(input_files[i_cam][0])
        camera_str      = get_camera_name(input_files[i_cam][0])
        calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str)

    cams = []
    not_masked = []

    # Assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors, K, k, p = get_full_intrinsics(input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        K = torch.from_numpy(K).unsqueeze(0)
        k = torch.from_numpy(k).unsqueeze(0)
        p = torch.from_numpy(p).unsqueeze(0)
        pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)
        camera_type = get_camera_type(input_files[i_cam][0], calib_data)
        camera_type_int = torch.tensor([get_camera_type_int(camera_type)])

        cams.append(CameraMultifocal(poly_coeffs=poly_coeffs.float(),
                                     principal_point=principal_point.float(),
                                     scale_factors=scale_factors.float(),
                                     K=K.float(),
                                     k1=k[:, 0].float(),
                                     k2=k[:, 1].float(),
                                     k3=k[:, 2].float(),
                                     p1=p[:, 0].float(),
                                     p2=p[:, 1].float(),
                                     camera_type=camera_type_int,
                                     Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()))

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(torch.from_numpy(ego_mask.astype(float)).cuda().float())

    # Learning variables
    extra_trans_m = [torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True) for _ in range(N_cams)]
    extra_rot_deg = [torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True) for _ in range(N_cams)]

    # Constraints: translation
    frozen_cams_trans = args.frozen_cams_trans
    if frozen_cams_trans is not None:
        for i_cam in frozen_cams_trans:
            extra_trans_m[i_cam].requires_grad = False
    # Constraints: rotation
    frozen_cams_rot = args.frozen_cams_rot
    if frozen_cams_rot is not None:
        for i_cam in frozen_cams_rot:
            extra_rot_deg[i_cam].requires_grad = False

    # Parameters from argument parser
    save_pictures = args.save_pictures
    n_epochs = args.n_epochs
    learning_rate = args.lr
    step_size = args.scheduler_step_size
    gamma = args.scheduler_gamma

    # Table of loss
    loss_tab = np.zeros(n_epochs)

    # Table of extra rotation values
    extra_rot_values_tab = np.zeros((N_cams * 3, N_files * n_epochs))

    # Optimizer
    optimizer = optim.Adam(extra_trans_m + extra_rot_deg, lr=learning_rate)

    # Scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    # Regularization weights
    regul_weight_trans = torch.tensor(args.regul_weight_trans).cuda()
    regul_weight_rot = torch.tensor(args.regul_weight_rot).cuda()
    regul_weight_overlap = torch.tensor(args.regul_weight_overlap).cuda()

    # Loop on the number of epochs
    count = 0
    for epoch in range(n_epochs):
        print('Epoch ' + str(epoch) + '/' + str(n_epochs))

        # Initialize loss
        loss_sum = 0

        # Loop on the number of files
        for i_file in range(N_files):

            # Filename for camera 0
            base_0, ext_0 = os.path.splitext(os.path.basename(input_files[0][i_file]))
            print(base_0)

            # Initialize list of tensors: images, predicted inverse depths and predicted depths
            images          = []
            pred_inv_depths = []
            pred_depths     = []
            # Loop on cams and predict depth
            for i_cam in range(N_cams):
                images.append(load_image(input_files[i_cam][i_file]).convert('RGB'))
                images[i_cam] = resize_image(images[i_cam], image_shape)
                images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
                if torch.cuda.is_available():
                    images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()))
                with torch.no_grad():
                    pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam]))
                    pred_depths.append(inv2depth(pred_inv_depths[i_cam]))

            # Define a loss function between 2 images
            def photo_loss_2imgs(i_cam1, i_cam2, extra_trans_list, extra_rot_list, save_pictures):
                # Computes the photometric loss between 2 images of adjacent cameras
                # It reconstructs each image from the adjacent one, applying correction in rotation and translation

                # Apply correction on two cams
                for i, i_cam in enumerate([i_cam1, i_cam2]):
                    pose_matrix = get_extrinsics_pose_matrix_extra_trans_rot_torch(input_files[i_cam][i_file],
                                                                                   calib_data,
                                                                                   extra_trans_list[i],
                                                                                   extra_rot_list[i]).unsqueeze(0)
                    pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()))
                    CameraMultifocal.Twc.fget.cache_clear()
                    cams[i_cam].Tcw = pose_tensor

                # Reconstruct 3D points for each cam
                world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w')
                world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w')

                # Get coordinates of projected points on other cam
                ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w')
                ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w')

                # Reconstruct each image from the adjacent camera
                reconstructedImg2to1 = funct.grid_sample(images[i_cam2]*not_masked[i_cam2],
                                                         ref_coords1to2,
                                                         mode='bilinear', padding_mode='zeros', align_corners=True)
                reconstructedImg1to2 = funct.grid_sample(images[i_cam1]*not_masked[i_cam1],
                                                         ref_coords2to1,
                                                         mode='bilinear', padding_mode='zeros', align_corners=True)
                # Save pictures if requested
                if save_pictures:
                    # Save original files if first epoch
                    if epoch == 0:
                        cv2.imwrite(args.save_folder + '/cam_' + str(i_cam1) + '_file_' + str(i_file) + '_orig.png',
                                    (images[i_cam1][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)
                        cv2.imwrite(args.save_folder + '/cam_' + str(i_cam2) + '_file_' + str(i_file) + '_orig.png',
                                    (images[i_cam2][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)
                    # Save reconstructed images
                    cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam1) + '_recons_from_' + str(i_cam2) + '.png',
                                ((reconstructedImg2to1*not_masked[i_cam1])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)
                    cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam2) + '_recons_from_' + str(i_cam1) + '.png',
                                ((reconstructedImg1to2*not_masked[i_cam2])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255)

                # L1 loss
                l1_loss_1 = torch.abs(images[i_cam1]*not_masked[i_cam1] - reconstructedImg2to1*not_masked[i_cam1])
                l1_loss_2 = torch.abs(images[i_cam2]*not_masked[i_cam2] - reconstructedImg1to2*not_masked[i_cam2])

                # SSIM loss
                ssim_loss_weight = 0.85
                ssim_loss_1 = SSIM(images[i_cam1]*not_masked[i_cam1],
                                   reconstructedImg2to1*not_masked[i_cam1],
                                   C1=1e-4, C2=9e-4, kernel_size=3)
                ssim_loss_2 = SSIM(images[i_cam2]*not_masked[i_cam2],
                                   reconstructedImg1to2*not_masked[i_cam2],
                                   C1=1e-4, C2=9e-4, kernel_size=3)

                ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.)
                ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.)

                # Photometric loss: alpha * ssim + (1 - alpha) * l1
                photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(1, True)
                photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(1, True)

                # Compute the number of valid pixels
                mask1 = (reconstructedImg2to1 * not_masked[i_cam1]).sum(axis=1, keepdim=True) != 0
                s1 = mask1.sum().float()
                mask2 = (reconstructedImg1to2 * not_masked[i_cam2]).sum(axis=1, keepdim=True) != 0
                s2 = mask2.sum().float()

                # Compute the photometric losses weighed by the number of valid pixels
                loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0
                loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0

                # The final loss can be regularized to encourage a similar overlap between images
                if s1 > 0 and s2 > 0:
                    return loss_1 + loss_2 + regul_weight_overlap * image_area * (1/s1 + 1/s2)
                else:
                    return 0.

            # The final loss consists of summing the photometric loss of all pairs of adjacent cameras
            # and is regularized to prevent weights from exploding
            loss = sum([photo_loss_2imgs(i, (i + 1) % 4,
                                         [extra_trans_m[i], extra_trans_m[(i + 1) % 4]],
                                         [extra_rot_deg[i], extra_rot_deg[(i + 1) % 4]],
                                         save_pictures)
                        for i in range(4)]) \
                   + regul_weight_rot * sum([(extra_rot_deg[i] ** 2).sum() for i in range(4)]) \
                   + regul_weight_trans * sum([(extra_trans_m[i] ** 2).sum() for i in range(4)])

            # Optimization steps
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Save correction values and print loss
            with torch.no_grad():
                loss_sum += loss.item()
                for i_cam in range(N_cams):
                    for j in range(3):
                        extra_rot_values_tab[3*i_cam + j, count] = extra_rot_deg[i_cam][j].item()
                print('Loss:')
                print(loss)

            count += 1

        # Update scheduler
        print('Epoch:', epoch, 'LR:', scheduler.get_lr())
        scheduler.step()
        with torch.no_grad():
            print('End of epoch')
            print(extra_trans_m)
            print(extra_rot_deg)

        loss_tab[epoch] = loss_sum/N_files

    # Plot/save loss if requested
    plt.figure()
    plt.plot(loss_tab)
    if args.show_plots:
        plt.show()
    if args.save_plots:
        plt.savefig(os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_loss.png'))

    # Plot/save correction values if requested
    plt.figure()
    for j in range(N_cams * 3):
        plt.plot(extra_rot_values_tab[j])
    if args.show_plots:
        plt.show()
    if args.save_plots:
        plt.savefig(os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_extra_rot.png'))

    # Save correction values table if requested
    if args.save_rot_tab:
        np.save(os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_rot_tab.npy'),
                extra_rot_values_tab)
예제 #21
0
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers,
                               image_shape, half, save, stop):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    N_cams = len(input_files)
    N_files = len(input_files[0])

    camera_names = []
    for i_cam in range(N_cams):
        camera_names.append(get_camera_name(input_files[i_cam][0]))

    cams = []
    not_masked = []

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1),
                                               max_bound=(1000, 1000, 5))

    # let's assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str = get_split_type(input_files[i_cam][0])
        seq_name_str = get_sequence_name(input_files[i_cam][0])
        camera_str = get_camera_name(input_files[i_cam][0])

        calib_data = {}
        calib_data[camera_str] = read_raw_calib_files_camera_valeo(
            base_folder_str, split_type_str, seq_name_str, camera_str)

        path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0])
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors = get_intrinsics(
            input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        pose_matrix = torch.from_numpy(
            get_extrinsics_pose_matrix(input_files[i_cam][0],
                                       calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)

        cams.append(
            CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                          path_to_ego_mask=[path_to_ego_mask],
                          poly_coeffs=poly_coeffs.float(),
                          principal_point=principal_point.float(),
                          scale_factors=scale_factors.float(),
                          Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(ego_mask.astype(bool).reshape(-1))

    # create output dirs for each cam
    seq_name = get_sequence_name(input_files[0][0])
    for i_cam in range(N_cams):
        os.makedirs(os.path.join(output_folder, seq_name, 'depth',
                                 camera_names[i_cam]),
                    exist_ok=True)
        os.makedirs(os.path.join(output_folder, seq_name, 'rgb',
                                 camera_names[i_cam]),
                    exist_ok=True)

    for i_file in range(0, N_files, 25):

        base_0, ext_0 = os.path.splitext(
            os.path.basename(input_files[0][i_file]))
        print(base_0)

        images = []
        images_numpy = []
        pred_inv_depths = []
        pred_depths = []
        world_points = []
        input_depth_files = []
        has_gt_depth = []
        gt_depth = []
        gt_depth_3d = []
        pcl_full = []
        pcl_only_inliers = []
        pcl_only_outliers = []
        pcl_gt = []
        rgb = []
        viz_pred_inv_depths = []
        for i_cam in range(N_cams):
            images.append(
                load_image(input_files[i_cam][i_file]).convert('RGB'))
            images[i_cam] = resize_image(images[i_cam], image_shape)
            images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
            if torch.cuda.is_available():
                images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()),
                                                 dtype=dtype)
            images_numpy.append(images[i_cam][0].cpu().numpy())
            images_numpy[i_cam] = images_numpy[i_cam].reshape(
                (3, -1)).transpose()
            images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]]
            pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam]))
            pred_depths.append(inv2depth(pred_inv_depths[i_cam]))
            world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam],
                                                        frame='w'))
            world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
            world_points[i_cam] = world_points[i_cam].reshape(
                (3, -1)).transpose()
            world_points[i_cam] = world_points[i_cam][not_masked[i_cam]]
            cam_name = camera_names[i_cam]
            cam_int = cam_name.split('_')[-1]
            input_depth_files.append(get_depth_file(
                input_files[i_cam][i_file]))
            has_gt_depth.append(os.path.exists(input_depth_files[i_cam]))
            if has_gt_depth[i_cam]:
                gt_depth.append(
                    np.load(input_depth_files[i_cam])['velodyne_depth'].astype(
                        np.float32))
                gt_depth[i_cam] = torch.from_numpy(
                    gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
                if torch.cuda.is_available():
                    gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(
                        rank()),
                                                         dtype=dtype)
                gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam],
                                                           frame='w'))
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy()
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape(
                    (3, -1)).transpose()
                #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]]
            else:
                gt_depth.append(0)
                gt_depth_3d.append(0)
            pcl_full.append(o3d.geometry.PointCloud())
            pcl_full[i_cam].points = o3d.utility.Vector3dVector(
                world_points[i_cam])
            pcl_full[i_cam].colors = o3d.utility.Vector3dVector(
                images_numpy[i_cam])

            pcl = pcl_full[i_cam]  #.select_by_index(ind)
            points_tmp = np.asarray(pcl.points)
            colors_tmp = np.asarray(pcl.colors)
            # remove points that are above
            mask_height = points_tmp[:,
                                     2] > 2.5  # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3)
            mask_colors_blue = np.sum(
                np.abs(colors_tmp - np.array([0.6, 0.8, 1])),
                axis=1) < 0.6  # bleu ciel
            mask_colors_green = np.sum(
                np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8
            mask_colors_green2 = np.sum(
                np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2
            mask = 1 - mask_height * mask_colors_blue
            mask2 = 1 - mask_height * mask_colors_green
            mask3 = 1 - mask_height * mask_colors_green2
            mask = mask * mask2 * mask3
            pcl = pcl.select_by_index(np.where(mask)[0])
            cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7,
                                                     std_ratio=1.4)
            pcl = pcl.select_by_index(ind)
            pcl_only_inliers.append(
                pcl)  #pcl_full[i_cam].select_by_index(ind)[mask])
            #pcl_only_outliers.append(pcl_full[i_cam].select_by_index(ind, invert=True))
            #pcl_only_outliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0])
            if has_gt_depth[i_cam]:
                pcl_gt.append(o3d.geometry.PointCloud())
                pcl_gt[i_cam].points = o3d.utility.Vector3dVector(
                    gt_depth_3d[i_cam])
                gt_inv_depth = 1 / (
                    np.linalg.norm(gt_depth_3d[i_cam], axis=1) + 1e-6)
                cm = get_cmap('plasma')
                normalizer = .35  #np.percentile(gt_inv_depth, 95)
                gt_inv_depth /= (normalizer + 1e-6)
                #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape)  # [:, :3]

                pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(
                    cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3])
            else:
                pcl_gt.append(0)
        #o3d.visualization.draw_geometries(pcl_full + [e for i, e in enumerate(pcl_gt) if e != 0])

        # vis_full = o3d.visualization.Visualizer()
        # vis_full.create_window(visible = True, window_name = 'full'+str(i_file))
        # for i_cam in range(N_cams):
        #     vis_full.add_geometry(pcl_full[i_cam])
        # for i, e in enumerate(pcl_gt):
        #     if e != 0:
        #         vis_full.add_geometry(e)
        # ctr = vis_full.get_view_control()
        # ctr.set_lookat(lookat_vector)
        # ctr.set_front(front_vector)
        # ctr.set_up(up_vector)
        # ctr.set_zoom(zoom_float)
        # #vis_full.run()
        # vis_full.destroy_window()
        N = 480
        for i_cam_n in range(N):
            angle_str = str(int(360 / N * i_cam_n))
            vis_only_inliers = o3d.visualization.Visualizer()
            vis_only_inliers.create_window(visible=True,
                                           window_name='inliers' + str(i_file))
            for i_cam in range(N_cams):
                vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
            for i, e in enumerate(pcl_gt):
                if e != 0:
                    vis_only_inliers.add_geometry(e)
            ctr = vis_only_inliers.get_view_control()
            ctr.set_lookat(lookat_vector)
            ctr.set_front(front_vector)
            ctr.set_up(up_vector)
            ctr.set_zoom(zoom_float)
            param = o3d.io.read_pinhole_camera_parameters(
                '/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'
                + str(i_cam_n) + 'stop.json')
            ctr.convert_from_pinhole_camera_parameters(param)
            opt = vis_only_inliers.get_render_option()
            opt.background_color = np.asarray([0, 0, 0])
            #vis_only_inliers.update_geometry('inliers0')
            vis_only_inliers.poll_events()
            vis_only_inliers.update_renderer()
            if stop:
                vis_only_inliers.run()
                pcd1 = pcl_only_inliers[0] + pcl_only_inliers[
                    1] + pcl_only_inliers[2] + pcl_only_inliers[3]
                for i_cam3 in range(4):
                    if has_gt_depth[i_cam3]:
                        pcd1 += pcl_gt[i_cam3]
                o3d.io.write_point_cloud(
                    os.path.join(output_folder, seq_name, 'open3d',
                                 base_0 + str(i_cam_n) + '.pcd'), pcd1)
            #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
            #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
            image = vis_only_inliers.capture_screen_float_buffer(False)
            plt.imsave(os.path.join(output_folder, seq_name, 'pcl', 'normal',
                                    str(i_cam_n) + '_normal' + '.png'),
                       np.asarray(image),
                       dpi=1)
            vis_only_inliers.destroy_window()
            del ctr
            del vis_only_inliers
            del opt

        #del ctr
        #del vis_full
        #del vis_only_inliers
        #del vis_inliers_outliers
        #del vis_inliers_cropped

        for i_cam in range(N_cams):
            rgb.append(
                images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255)
            viz_pred_inv_depths.append(
                viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.8) * 255)
            viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape)
                                       == 0] = 0
            concat = np.concatenate([rgb[i_cam], viz_pred_inv_depths[i_cam]],
                                    0)
            # Save visualization
            output_file1 = os.path.join(
                output_folder, seq_name, 'depth', camera_names[i_cam],
                os.path.basename(input_files[i_cam][i_file]))
            output_file2 = os.path.join(
                output_folder, seq_name, 'rgb', camera_names[i_cam],
                os.path.basename(input_files[i_cam][i_file]))
            imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1])
            imwrite(output_file2, rgb[i_cam][:, :, ::-1])
    def project_fisheye(self, X, mask, frame='w'):
        """
        Projects 3D points onto the image plane

        Parameters
        ----------
        X : torch.Tensor [B,3,H,W]
            3D points to be projected
        frame : 'w'
            Reference frame: 'c' for camera and 'w' for world

        Returns
        -------
        points : torch.Tensor [B,H,W,2]
            2D projected points that are within the image boundaries
        """
        B, C, H, W = X[mask].shape
        assert C == 3

        # World to camera:
        if frame == 'c':
            Xc = X[mask].view(B, 3, -1) # [B, 3, HW]
        elif frame == 'w':
            Xc = (Pose(self.Tcw.mat[mask]) @ X[mask]).view(B, 3, -1) # [B, 3, HW]
        else:
            raise ValueError('Unknown reference frame {}'.format(frame))

        c1 = self.poly_coeffs[mask, 0].unsqueeze(1)
        c2 = self.poly_coeffs[mask, 1].unsqueeze(1)
        c3 = self.poly_coeffs[mask, 2].unsqueeze(1)
        c4 = self.poly_coeffs[mask, 3].unsqueeze(1)

        # Project 3D points onto the camera image plane
        X = Xc[:, 0] # [B, HW]
        Y = Xc[:, 1] # [B, HW]
        Z = Xc[:, 2] # [B, HW]
        phi = torch.atan2(Y, X) # [B, HW]
        rc = torch.sqrt(torch.pow(X, 2) + torch.pow(Y, 2)) # [B, HW]
        theta_1 = math.pi / 2 - torch.atan2(Z, rc) # [B, HW]
        theta_2 = torch.pow(theta_1, 2) # [B, HW]
        theta_3 = torch.pow(theta_1, 3) # [B, HW]
        theta_4 = torch.pow(theta_1, 4) # [B, HW]

        rho = c1 * theta_1 + c2 * theta_2 + c3 * theta_3 + c4 * theta_4 # [B, HW]
        rho = rho * ((X != 0) | (Y != 0) | (Z != 0))
        u = rho * torch.cos(phi) / self.scale_factors[mask, 0].unsqueeze(1) + self.principal_point[mask, 0].unsqueeze(1) # [B, HW]
        v = rho * torch.sin(phi) / self.scale_factors[mask, 1].unsqueeze(1) + self.principal_point[mask, 1].unsqueeze(1)  # [B, HW]
        if torch.isnan(u).sum() > 0:
            print(str(torch.isnan(u).sum().item()) + ' nans in projected fisheye')
        if torch.isnan(v).sum() > 0:
            print(str(torch.isnan(v).sum().item()) + ' nans in projected fisheye')

        # Normalize points
        Xnorm = 2 * u / (W - 1)# - 1.
        Ynorm = 2 * v / (H - 1)# - 1.

        # Clamp out-of-bounds pixels
        Xmask = ((Xnorm > 1) + (Xnorm < -1)).detach()
        Xnorm[Xmask] = 2.
        Ymask = ((Ynorm > 1) + (Ynorm < -1)).detach()
        Ynorm[Ymask] = 2.

        mask_out = (theta_1 * 180 * 2 / np.pi > 190.0).detach()
        Xnorm[mask_out] = 2.
        Ynorm[mask_out] = 2.

        # Return pixel coordinates
        return torch.stack([Xnorm, Ynorm], dim=-1).view(B, H, W, 2)
    def project_distorted(self, X, mask, frame='w'):
        """
        Projects 3D points onto the image plane

        Parameters
        ----------
        X : torch.Tensor [B,3,H,W]
            3D points to be projected
        frame : 'w'
            Reference frame: 'c' for camera and 'w' for world

        Returns
        -------
        points : torch.Tensor [B,H,W,2]
            2D projected points that are within the image boundaries
        """
        B, C, H, W = X[mask].shape
        assert C == 3

        # Project 3D points onto the camera image plane
        if frame == 'c':
            Xc = X[mask].view(B, 3, -1)
        elif frame == 'w':
            Xc = (Pose(self.Tcw.mat[mask]) @ X[mask]).view(B, 3, -1)
        else:
            raise ValueError('Unknown reference frame {}'.format(frame))

        # Z-Normalize points
        Z  = Xc[:, 2].clamp(min=1e-5)
        Xn = Xc[:, 0] / Z
        Yn = Xc[:, 1] / Z

        veryFarMask = ((Xn > 5) + (Xn < -5) + (Yn > 5) + (Yn < -5)).detach()
        Xn[veryFarMask] = 0.
        Yn[veryFarMask] = 0.

        r2 = Xn.pow(2) + Yn.pow(2)
        r4 = r2.pow(2)
        r6 = r2 * r4

        # Distorted normalized points
        rad_dist = (1 + self.k1[mask].view(B,1) * r2 + self.k2[mask].view(B,1) * r4 + self.k3[mask].view(B,1) * r6)
        Xd = Xn * rad_dist + 2 * self.p1[mask].view(B,1) * Xn * Yn + self.p2[mask].view(B,1) * (r2 + 2 * Xn.pow(2))
        Yd = Yn * rad_dist + 2 * self.p2[mask].view(B,1) * Xn * Yn + self.p1[mask].view(B,1) * (r2 + 2 * Yn.pow(2))

        # Final projection
        u = self.fx[mask].view(B,1) * Xd + self.cx[mask].view(B,1)
        v = self.fy[mask].view(B,1) * Yd + self.cy[mask].view(B,1)

        # normalized coordinates
        uNorm = 2 * u / (W - 1) - 1.
        vNorm = 2 * v / (H - 1) - 1.

        # Clamp out-of-bounds pixels
        Xmask = ((uNorm > 1) + (uNorm < -1)).detach()
        uNorm[Xmask] = 2.
        Ymask = ((vNorm > 1) + (vNorm < -1)).detach()
        vNorm[Ymask] = 2.

        # Clamp very far out-of-bounds pixels
        uNorm[veryFarMask] = 2.
        vNorm[veryFarMask] = 2.

        # Return pixel coordinates
        return torch.stack([uNorm, vNorm], dim=-1).view(B, H, W, 2).float()
예제 #24
0
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : list (number of cameras) of lists (number of files) of str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    N_cams = len(input_files)
    N_files = len(input_files[0])

    camera_names = []
    for i_cam in range(N_cams):
        camera_names.append(get_camera_name(input_files[i_cam][0]))

    cams = []
    not_masked = []

    cams_x = []
    cams_y = []
    cams_z = []

    alpha_mask = 0.5

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5))

    # let's assume all images are from the same sequence (thus same cameras)
    for i_cam in range(N_cams):
        base_folder_str = get_base_folder(input_files[i_cam][0])
        split_type_str  = get_split_type(input_files[i_cam][0])
        seq_name_str    = get_sequence_name(input_files[i_cam][0])
        camera_str      = get_camera_name(input_files[i_cam][0])

        calib_data = {}
        calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str)

        cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m']))
        cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m']))
        cams_z.append(float(calib_data[camera_str]['extrinsics']['pos_y_m']))

        path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0])
        path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0])
        poly_coeffs, principal_point, scale_factors = get_intrinsics(input_files[i_cam][0], calib_data)

        poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0)
        principal_point = torch.from_numpy(principal_point).unsqueeze(0)
        scale_factors = torch.from_numpy(scale_factors).unsqueeze(0)
        pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0)
        pose_tensor = Pose(pose_matrix)

        cams.append(CameraFisheye(path_to_theta_lut=[path_to_theta_lut],
                             path_to_ego_mask=[path_to_ego_mask],
                             poly_coeffs=poly_coeffs.float(),
                             principal_point=principal_point.float(),
                             scale_factors=scale_factors.float(),
                             Tcw=pose_tensor))
        if torch.cuda.is_available():
            cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)

        ego_mask = np.load(path_to_ego_mask)
        not_masked.append(ego_mask.astype(bool).reshape(-1))

    cams_middle = np.zeros(3)
    cams_middle[0] = (cams_x[0] + cams_x[1] + cams_x[2] + cams_x[3]) / 4
    cams_middle[1] = (cams_y[0] + cams_y[1] + cams_y[2] + cams_y[3]) / 4
    cams_middle[2] = (cams_z[0] + cams_z[1] + cams_z[2] + cams_z[3]) / 4

    # create output dirs for each cam
    seq_name = get_sequence_name(input_files[0][0])
    for i_cam in range(N_cams):
        os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True)
        os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True)



    for i_file in range(0, N_files):

        base_0, ext_0 = os.path.splitext(os.path.basename(input_files[0][i_file]))
        print(base_0)
        os.makedirs(os.path.join(output_folder, seq_name, 'pcl', base_0), exist_ok=True)

        images = []
        images_numpy = []
        pred_inv_depths = []
        pred_depths = []
        world_points = []
        input_depth_files = []
        has_gt_depth = []
        input_full_masks = []
        has_full_mask = []
        gt_depth = []
        gt_depth_3d = []
        pcl_full = []
        pcl_only_inliers = []
        pcl_only_outliers = []
        pcl_gt = []
        rgb = []
        viz_pred_inv_depths = []
        for i_cam in range(N_cams):
            images.append(load_image(input_files[i_cam][i_file]).convert('RGB'))
            images[i_cam] = resize_image(images[i_cam], image_shape)
            images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0)
            if torch.cuda.is_available():
                images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)

            pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam]))
            pred_depths.append(inv2depth(pred_inv_depths[i_cam]))
            pred_depth_copy = pred_depths[i_cam].squeeze(0).squeeze(0).cpu().numpy()
            pred_depth_copy = np.uint8(pred_depth_copy)
            lap = np.uint8(np.absolute(cv2.Laplacian(pred_depth_copy,cv2.CV_64F,ksize=3)))
            great_lap = lap < 4
            great_lap = great_lap.reshape(-1)
            images_numpy.append(images[i_cam][0].cpu().numpy())
            images_numpy[i_cam] = images_numpy[i_cam].reshape((3, -1)).transpose()
            images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]*great_lap]

            world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam], frame='w'))
            world_points[i_cam] = world_points[i_cam][0].cpu().numpy()
            world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose()
            world_points[i_cam] = world_points[i_cam][not_masked[i_cam]*great_lap]
            cam_name = camera_names[i_cam]
            cam_int = cam_name.split('_')[-1]
            input_depth_files.append(get_depth_file(input_files[i_cam][i_file]))
            has_gt_depth.append(os.path.exists(input_depth_files[i_cam]))
            if has_gt_depth[i_cam]:
                gt_depth.append(np.load(input_depth_files[i_cam])['velodyne_depth'].astype(np.float32))
                gt_depth[i_cam] = torch.from_numpy(gt_depth[i_cam]).unsqueeze(0).unsqueeze(0)
                if torch.cuda.is_available():
                    gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(rank()), dtype=dtype)
                gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w'))
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy()
                gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape((3, -1)).transpose()
                #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]]
            else:
                gt_depth.append(0)
                gt_depth_3d.append(0)
            input_full_masks.append(get_full_mask_file(input_files[i_cam][i_file]))
            has_full_mask.append(os.path.exists(input_full_masks[i_cam]))

            pcl_full.append(o3d.geometry.PointCloud())
            pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam])
            if has_full_mask[i_cam]:
                full_mask = np.load(input_full_masks[i_cam])
                mask_colors = label_colors[correspondence[full_mask]].reshape((-1, 3))#.transpose()
                mask_colors = mask_colors[not_masked[i_cam]*great_lap]
                pcl_full[i_cam].colors = o3d.utility.Vector3dVector(alpha_mask * mask_colors + (1-alpha_mask) * images_numpy[i_cam])
            else:
                pcl_full[i_cam].colors = o3d.utility.Vector3dVector(images_numpy[i_cam])

            pcl = pcl_full[i_cam]#.select_by_index(ind)
            points_tmp = np.asarray(pcl.points)
            colors_tmp = images_numpy[i_cam]#np.asarray(pcl.colors)
            # remove points that are above
            mask_height = points_tmp[:, 2] > 1.5# * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3)
            mask_colors_blue = np.sum(np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6  # bleu ciel
            mask_colors_green = np.sum(np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8
            mask_colors_green2 = np.sum(np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2
            mask = 1-mask_height*mask_colors_blue
            mask2 = 1-mask_height*mask_colors_green
            mask3 = 1- mask_height*mask_colors_green2
            mask = mask*mask2*mask3
            pcl = pcl.select_by_index(np.where(mask)[0])
            cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.2)
            pcl = pcl.select_by_index(ind)
            pcl = pcl.voxel_down_sample(voxel_size=0.02)
            #if has_full_mask[i_cam]:
            #    pcl.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.2, max_nn=15))
            pcl_only_inliers.append(pcl)#pcl_full[i_cam].select_by_index(ind)[mask])
            #pcl_only_outliers.append(pcl_full[i_cam].select_by_index(ind, invert=True))
            #pcl_only_outliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0])
            if has_gt_depth[i_cam]:
                pcl_gt.append(o3d.geometry.PointCloud())
                pcl_gt[i_cam].points = o3d.utility.Vector3dVector(gt_depth_3d[i_cam])
                gt_inv_depth = 1 / (np.linalg.norm(gt_depth_3d[i_cam] - cams_middle, axis=1) + 1e-6)
                cm = get_cmap('plasma')
                normalizer = .35#np.percentile(gt_inv_depth, 95)
                gt_inv_depth /= (normalizer + 1e-6)
                #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape)  # [:, :3]

                pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3])
            else:
                pcl_gt.append(0)

        # threshold = 0.5
        # threshold2 = 0.1
        # for i_cam in range(4):
        #     if has_full_mask[i_cam]:
        #         for relative in [-1, 1]:
        #             if not has_full_mask[(i_cam + relative) % 4]:
        #                 dists = pcl_only_inliers[(i_cam + relative) % 4].compute_point_cloud_distance(pcl_only_inliers[i_cam])
        #                 p1 = pcl_only_inliers[(i_cam + relative) % 4].select_by_index(np.where(np.asarray(dists) > threshold)[0])
        #                 p2 = pcl_only_inliers[(i_cam + relative) % 4].select_by_index(np.where(np.asarray(dists) > threshold)[0], invert=True).uniform_down_sample(15)#.voxel_down_sample(voxel_size=0.5)
        #                 pcl_only_inliers[(i_cam + relative) % 4] = p1 + p2
        #     if has_gt_depth[i_cam]:
        #         if has_full_mask[i_cam]:
        #             down = 15
        #         else:
        #             down = 30
        #         dists = pcl_only_inliers[i_cam].compute_point_cloud_distance(pcl_gt[i_cam])
        #         p1 = pcl_only_inliers[i_cam].select_by_index(np.where(np.asarray(dists) > threshold2)[0])
        #         p2 = pcl_only_inliers[i_cam].select_by_index(np.where(np.asarray(dists) > threshold2)[0], invert=True).uniform_down_sample(down)#.voxel_down_sample(voxel_size=0.5)
        #         pcl_only_inliers[i_cam] = p1 + p2

        for i_cam_n in range(120):
            vis_only_inliers = o3d.visualization.Visualizer()
            vis_only_inliers.create_window(visible = True, window_name = 'inliers'+str(i_file))
            for i_cam in range(N_cams):
                vis_only_inliers.add_geometry(pcl_only_inliers[i_cam])
            for i, e in enumerate(pcl_gt):
                if e != 0:
                    vis_only_inliers.add_geometry(e)
            ctr = vis_only_inliers.get_view_control()
            ctr.set_lookat(lookat_vector)
            ctr.set_front(front_vector)
            ctr.set_up(up_vector)
            ctr.set_zoom(zoom_float)
            param = o3d.io.read_pinhole_camera_parameters('/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'+str(i_cam_n)+'v3rear.json')
            ctr.convert_from_pinhole_camera_parameters(param)
            opt = vis_only_inliers.get_render_option()
            opt.background_color = np.asarray([0, 0, 0])
            opt.point_size = 3.0
            #opt.light_on = False
            #vis_only_inliers.update_geometry('inliers0')
            vis_only_inliers.poll_events()
            vis_only_inliers.update_renderer()
            if stop:
                vis_only_inliers.run()
                pcd1 = pcl_only_inliers[0]+pcl_only_inliers[1]+pcl_only_inliers[2]+pcl_only_inliers[3]
                for i_cam3 in range(4):
                    if has_gt_depth[i_cam3]:
                        pcd1 += pcl_gt[i_cam3]
                #o3d.io.write_point_cloud(os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1)
            #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters()
            #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param)
            image = vis_only_inliers.capture_screen_float_buffer(False)
            plt.imsave(os.path.join(output_folder, seq_name, 'pcl',  base_0,  str(i_cam_n) + '.png'),
                       np.asarray(image), dpi=1)
            vis_only_inliers.destroy_window()
            del ctr
            del vis_only_inliers
            del opt

        # vis_inliers_outliers = o3d.visualization.Visualizer()
        # vis_inliers_outliers.create_window(visible = True, window_name = 'inout'+str(i_file))
        # for i_cam in range(N_cams):
        #     vis_inliers_outliers.add_geometry(pcl_only_inliers[i_cam])
        #     vis_inliers_outliers.add_geometry(pcl_only_outliers[i_cam])
        # for i, e in enumerate(pcl_gt):
        #     if e != 0:
        #         vis_inliers_outliers.add_geometry(e)
        # ctr = vis_inliers_outliers.get_view_control()
        # ctr.set_lookat(lookat_vector)
        # ctr.set_front(front_vector)
        # ctr.set_up(up_vector)
        # ctr.set_zoom(zoom_float)
        # #vis_inliers_outliers.run()
        # vis_inliers_outliers.destroy_window()
        # for i_cam2 in range(4):
        #     for suff in ['', 'bis', 'ter']:
        #         vis_inliers_cropped = o3d.visualization.Visualizer()
        #         vis_inliers_cropped.create_window(visible = True, window_name = 'incrop'+str(i_file))
        #         for i_cam in range(N_cams):
        #             vis_inliers_cropped.add_geometry(pcl_only_inliers[i_cam].crop(bbox))
        #         for i, e in enumerate(pcl_gt):
        #             if e != 0:
        #                 vis_inliers_cropped.add_geometry(e)
        #         ctr = vis_inliers_cropped.get_view_control()
        #         ctr.set_lookat(lookat_vector)
        #         ctr.set_front(front_vector)
        #         ctr.set_up(up_vector)
        #         ctr.set_zoom(zoom_float)
        #         param = o3d.io.read_pinhole_camera_parameters(
        #             '/home/vbelissen/Downloads/test/cameras_jsons/test' + str(i_cam2 + 1) + suff + '.json')
        #         ctr.convert_from_pinhole_camera_parameters(param)
        #         opt = vis_inliers_cropped.get_render_option()
        #         opt.background_color = np.asarray([0, 0, 0])
        #         vis_inliers_cropped.poll_events()
        #         vis_inliers_cropped.update_renderer()
        #         #vis_inliers_cropped.run()
        #         image = vis_inliers_cropped.capture_screen_float_buffer(False)
        #         plt.imsave(os.path.join(output_folder, seq_name, 'pcl', 'cropped',  str(i_cam2) + suff, base_0 + '_cropped_' + str(i_cam2) + suff + '.png'),
        #                    np.asarray(image), dpi=1)
        #         vis_inliers_cropped.destroy_window()
        #         del ctr
        #         del opt
        #         del vis_inliers_cropped

        #del ctr
        #del vis_full
        #del vis_only_inliers
        #del vis_inliers_outliers
        #del vis_inliers_cropped

        for i_cam in range(N_cams):
            rgb.append(images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255)
            viz_pred_inv_depths.append(viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.8) * 255)
            viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape) == 0] = 0
            concat = np.concatenate([rgb[i_cam], viz_pred_inv_depths[i_cam]], 0)
            # Save visualization
            output_file1 = os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file]))
            output_file2 = os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file]))
            imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1])
            if has_full_mask[i_cam]:
                full_mask = np.load(input_full_masks[i_cam])
                mask_colors = label_colors[correspondence[full_mask]]
                imwrite(output_file2, (1-alpha_mask) * rgb[i_cam][:, :, ::-1] + alpha_mask * mask_colors[:, :, ::-1]*255)
            else:
                imwrite(output_file2, rgb[i_cam][:, :, ::-1])
예제 #25
0
def infer_plot_and_save_3D_pcl(input_file1, input_file2, input_file3,
                               input_file4, output_file1, output_file2,
                               output_file3, output_file4, model_wrapper1,
                               model_wrapper2, model_wrapper3, model_wrapper4,
                               hasGTdepth1, hasGTdepth2, hasGTdepth3,
                               hasGTdepth4, image_shape, half, save):
    """
    Process a single input file to produce and save visualization

    Parameters
    ----------
    input_file : str
        Image file
    output_file : str
        Output file, or folder where the output will be saved
    model_wrapper : nn.Module
        Model wrapper used for inference
    image_shape : Image shape
        Input image shape
    half: bool
        use half precision (fp16)
    save: str
        Save format (npz or png)
    """
    if not is_image(output_file1):
        # If not an image, assume it's a folder and append the input name
        os.makedirs(output_file1, exist_ok=True)
        output_file1 = os.path.join(output_file1,
                                    os.path.basename(input_file1))
    if not is_image(output_file2):
        # If not an image, assume it's a folder and append the input name
        os.makedirs(output_file2, exist_ok=True)
        output_file2 = os.path.join(output_file2,
                                    os.path.basename(input_file2))
    if not is_image(output_file3):
        # If not an image, assume it's a folder and append the input name
        os.makedirs(output_file3, exist_ok=True)
        output_file3 = os.path.join(output_file3,
                                    os.path.basename(input_file3))
    if not is_image(output_file4):
        # If not an image, assume it's a folder and append the input name
        os.makedirs(output_file4, exist_ok=True)
        output_file4 = os.path.join(output_file4,
                                    os.path.basename(input_file4))

    # change to half precision for evaluation if requested
    dtype = torch.float16 if half else None

    # Load image
    image1 = load_image(input_file1).convert('RGB')
    image2 = load_image(input_file2).convert('RGB')
    image3 = load_image(input_file3).convert('RGB')
    image4 = load_image(input_file4).convert('RGB')
    # Resize and to tensor
    image1 = resize_image(image1, image_shape)
    image2 = resize_image(image2, image_shape)
    image3 = resize_image(image3, image_shape)
    image4 = resize_image(image4, image_shape)
    image1 = to_tensor(image1).unsqueeze(0)
    image2 = to_tensor(image2).unsqueeze(0)
    image3 = to_tensor(image3).unsqueeze(0)
    image4 = to_tensor(image4).unsqueeze(0)

    # Send image to GPU if available
    if torch.cuda.is_available():
        image1 = image1.to('cuda:{}'.format(rank()), dtype=dtype)
        image2 = image2.to('cuda:{}'.format(rank()), dtype=dtype)
        image3 = image3.to('cuda:{}'.format(rank()), dtype=dtype)
        image4 = image4.to('cuda:{}'.format(rank()), dtype=dtype)

    # Depth inference (returns predicted inverse depth)
    pred_inv_depth1 = model_wrapper1.depth(image1)
    pred_inv_depth2 = model_wrapper2.depth(image2)
    pred_inv_depth3 = model_wrapper1.depth(image3)
    pred_inv_depth4 = model_wrapper2.depth(image4)
    pred_depth1 = inv2depth(pred_inv_depth1)
    pred_depth2 = inv2depth(pred_inv_depth2)
    pred_depth3 = inv2depth(pred_inv_depth3)
    pred_depth4 = inv2depth(pred_inv_depth4)

    base_folder_str1 = get_base_folder(input_file1)
    split_type_str1 = get_split_type(input_file1)
    seq_name_str1 = get_sequence_name(input_file1)
    camera_str1 = get_camera_name(input_file1)

    base_folder_str2 = get_base_folder(input_file2)
    split_type_str2 = get_split_type(input_file2)
    seq_name_str2 = get_sequence_name(input_file2)
    camera_str2 = get_camera_name(input_file2)

    base_folder_str3 = get_base_folder(input_file3)
    split_type_str3 = get_split_type(input_file3)
    seq_name_str3 = get_sequence_name(input_file3)
    camera_str3 = get_camera_name(input_file3)

    base_folder_str4 = get_base_folder(input_file4)
    split_type_str4 = get_split_type(input_file4)
    seq_name_str4 = get_sequence_name(input_file4)
    camera_str4 = get_camera_name(input_file4)

    calib_data1 = {}
    calib_data2 = {}
    calib_data3 = {}
    calib_data4 = {}
    calib_data1[camera_str1] = read_raw_calib_files_camera_valeo(
        base_folder_str1, split_type_str1, seq_name_str1, camera_str1)
    calib_data2[camera_str2] = read_raw_calib_files_camera_valeo(
        base_folder_str2, split_type_str2, seq_name_str2, camera_str2)
    calib_data3[camera_str3] = read_raw_calib_files_camera_valeo(
        base_folder_str3, split_type_str3, seq_name_str3, camera_str3)
    calib_data4[camera_str4] = read_raw_calib_files_camera_valeo(
        base_folder_str4, split_type_str4, seq_name_str4, camera_str4)

    path_to_theta_lut1 = get_path_to_theta_lut(input_file1)
    path_to_ego_mask1 = get_path_to_ego_mask(input_file1)
    poly_coeffs1, principal_point1, scale_factors1 = get_intrinsics(
        input_file1, calib_data1)
    path_to_theta_lut2 = get_path_to_theta_lut(input_file2)
    path_to_ego_mask2 = get_path_to_ego_mask(input_file2)
    poly_coeffs2, principal_point2, scale_factors2 = get_intrinsics(
        input_file2, calib_data2)
    path_to_theta_lut3 = get_path_to_theta_lut(input_file3)
    path_to_ego_mask3 = get_path_to_ego_mask(input_file3)
    poly_coeffs3, principal_point3, scale_factors3 = get_intrinsics(
        input_file3, calib_data3)
    path_to_theta_lut4 = get_path_to_theta_lut(input_file4)
    path_to_ego_mask4 = get_path_to_ego_mask(input_file4)
    poly_coeffs4, principal_point4, scale_factors4 = get_intrinsics(
        input_file4, calib_data4)

    poly_coeffs1 = torch.from_numpy(poly_coeffs1).unsqueeze(0)
    principal_point1 = torch.from_numpy(principal_point1).unsqueeze(0)
    scale_factors1 = torch.from_numpy(scale_factors1).unsqueeze(0)
    poly_coeffs2 = torch.from_numpy(poly_coeffs2).unsqueeze(0)
    principal_point2 = torch.from_numpy(principal_point2).unsqueeze(0)
    scale_factors2 = torch.from_numpy(scale_factors2).unsqueeze(0)
    poly_coeffs3 = torch.from_numpy(poly_coeffs3).unsqueeze(0)
    principal_point3 = torch.from_numpy(principal_point3).unsqueeze(0)
    scale_factors3 = torch.from_numpy(scale_factors3).unsqueeze(0)
    poly_coeffs4 = torch.from_numpy(poly_coeffs4).unsqueeze(0)
    principal_point4 = torch.from_numpy(principal_point4).unsqueeze(0)
    scale_factors4 = torch.from_numpy(scale_factors4).unsqueeze(0)

    pose_matrix1 = torch.from_numpy(
        get_extrinsics_pose_matrix(input_file1, calib_data1)).unsqueeze(0)
    pose_matrix2 = torch.from_numpy(
        get_extrinsics_pose_matrix(input_file2, calib_data2)).unsqueeze(0)
    pose_matrix3 = torch.from_numpy(
        get_extrinsics_pose_matrix(input_file3, calib_data3)).unsqueeze(0)
    pose_matrix4 = torch.from_numpy(
        get_extrinsics_pose_matrix(input_file4, calib_data4)).unsqueeze(0)
    pose_tensor1 = Pose(pose_matrix1)
    pose_tensor2 = Pose(pose_matrix2)
    pose_tensor3 = Pose(pose_matrix3)
    pose_tensor4 = Pose(pose_matrix4)

    ego_mask1 = np.load(path_to_ego_mask1)
    ego_mask2 = np.load(path_to_ego_mask2)
    ego_mask3 = np.load(path_to_ego_mask3)
    ego_mask4 = np.load(path_to_ego_mask4)
    not_masked1 = ego_mask1.astype(bool).reshape(-1)
    not_masked2 = ego_mask2.astype(bool).reshape(-1)
    not_masked3 = ego_mask3.astype(bool).reshape(-1)
    not_masked4 = ego_mask4.astype(bool).reshape(-1)

    cam1 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut1],
                         path_to_ego_mask=[path_to_ego_mask1],
                         poly_coeffs=poly_coeffs1.float(),
                         principal_point=principal_point1.float(),
                         scale_factors=scale_factors1.float(),
                         Tcw=pose_tensor1)
    cam2 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut2],
                         path_to_ego_mask=[path_to_ego_mask2],
                         poly_coeffs=poly_coeffs2.float(),
                         principal_point=principal_point2.float(),
                         scale_factors=scale_factors2.float(),
                         Tcw=pose_tensor2)
    cam3 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut3],
                         path_to_ego_mask=[path_to_ego_mask3],
                         poly_coeffs=poly_coeffs3.float(),
                         principal_point=principal_point3.float(),
                         scale_factors=scale_factors3.float(),
                         Tcw=pose_tensor3)
    cam4 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut4],
                         path_to_ego_mask=[path_to_ego_mask4],
                         poly_coeffs=poly_coeffs4.float(),
                         principal_point=principal_point4.float(),
                         scale_factors=scale_factors4.float(),
                         Tcw=pose_tensor4)
    if torch.cuda.is_available():
        cam1 = cam1.to('cuda:{}'.format(rank()), dtype=dtype)
        cam2 = cam2.to('cuda:{}'.format(rank()), dtype=dtype)
        cam3 = cam3.to('cuda:{}'.format(rank()), dtype=dtype)
        cam4 = cam4.to('cuda:{}'.format(rank()), dtype=dtype)

    world_points1 = cam1.reconstruct(pred_depth1, frame='w')
    world_points1 = world_points1[0].cpu().numpy()
    world_points1 = world_points1.reshape((3, -1)).transpose()
    world_points2 = cam2.reconstruct(pred_depth2, frame='w')
    world_points2 = world_points2[0].cpu().numpy()
    world_points2 = world_points2.reshape((3, -1)).transpose()
    world_points3 = cam3.reconstruct(pred_depth3, frame='w')
    world_points3 = world_points3[0].cpu().numpy()
    world_points3 = world_points3.reshape((3, -1)).transpose()
    world_points4 = cam4.reconstruct(pred_depth4, frame='w')
    world_points4 = world_points4[0].cpu().numpy()
    world_points4 = world_points4.reshape((3, -1)).transpose()

    if hasGTdepth1:
        gt_depth_file1 = get_depth_file(input_file1)
        gt_depth1 = np.load(gt_depth_file1)['velodyne_depth'].astype(
            np.float32)
        gt_depth1 = torch.from_numpy(gt_depth1).unsqueeze(0).unsqueeze(0)
        if torch.cuda.is_available():
            gt_depth1 = gt_depth1.to('cuda:{}'.format(rank()), dtype=dtype)

        gt_depth_3d1 = cam1.reconstruct(gt_depth1, frame='w')
        gt_depth_3d1 = gt_depth_3d1[0].cpu().numpy()
        gt_depth_3d1 = gt_depth_3d1.reshape((3, -1)).transpose()
    if hasGTdepth2:
        gt_depth_file2 = get_depth_file(input_file2)
        gt_depth2 = np.load(gt_depth_file2)['velodyne_depth'].astype(
            np.float32)
        gt_depth2 = torch.from_numpy(gt_depth2).unsqueeze(0).unsqueeze(0)
        if torch.cuda.is_available():
            gt_depth2 = gt_depth2.to('cuda:{}'.format(rank()), dtype=dtype)

        gt_depth_3d2 = cam2.reconstruct(gt_depth2, frame='w')
        gt_depth_3d2 = gt_depth_3d2[0].cpu().numpy()
        gt_depth_3d2 = gt_depth_3d2.reshape((3, -1)).transpose()
    if hasGTdepth3:
        gt_depth_file3 = get_depth_file(input_file3)
        gt_depth3 = np.load(gt_depth_file3)['velodyne_depth'].astype(
            np.float33)
        gt_depth3 = torch.from_numpy(gt_depth3).unsqueeze(0).unsqueeze(0)
        if torch.cuda.is_available():
            gt_depth3 = gt_depth3.to('cuda:{}'.format(rank()), dtype=dtype)

        gt_depth_3d3 = cam3.reconstruct(gt_depth3, frame='w')
        gt_depth_3d3 = gt_depth_3d3[0].cpu().numpy()
        gt_depth_3d3 = gt_depth_3d3.reshape((3, -1)).transpose()
    if hasGTdepth4:
        gt_depth_file4 = get_depth_file(input_file4)
        gt_depth4 = np.load(gt_depth_file4)['velodyne_depth'].astype(
            np.float34)
        gt_depth4 = torch.from_numpy(gt_depth4).unsqueeze(0).unsqueeze(0)
        if torch.cuda.is_available():
            gt_depth4 = gt_depth4.to('cuda:{}'.format(rank()), dtype=dtype)

        gt_depth_3d4 = cam4.reconstruct(gt_depth4, frame='w')
        gt_depth_3d4 = gt_depth_3d4[0].cpu().numpy()
        gt_depth_3d4 = gt_depth_3d4.reshape((3, -1)).transpose()

    world_points1 = world_points1[not_masked1]
    world_points2 = world_points2[not_masked2]
    world_points3 = world_points3[not_masked3]
    world_points4 = world_points4[not_masked4]
    if hasGTdepth1:
        gt_depth_3d1 = gt_depth_3d1[not_masked1]
    if hasGTdepth2:
        gt_depth_3d2 = gt_depth_3d2[not_masked1]
    if hasGTdepth3:
        gt_depth_3d3 = gt_depth_3d3[not_masked3]
    if hasGTdepth4:
        gt_depth_3d4 = gt_depth_3d4[not_masked3]

    pcl1 = o3d.geometry.PointCloud()
    pcl1.points = o3d.utility.Vector3dVector(world_points1)
    img_numpy1 = image1[0].cpu().numpy()
    img_numpy1 = img_numpy1.reshape((3, -1)).transpose()
    pcl2 = o3d.geometry.PointCloud()
    pcl2.points = o3d.utility.Vector3dVector(world_points2)
    img_numpy2 = image2[0].cpu().numpy()
    img_numpy2 = img_numpy2.reshape((3, -1)).transpose()
    pcl3 = o3d.geometry.PointCloud()
    pcl3.points = o3d.utility.Vector3dVector(world_points3)
    img_numpy3 = image3[0].cpu().numpy()
    img_numpy3 = img_numpy3.reshape((3, -1)).transpose()
    pcl4 = o3d.geometry.PointCloud()
    pcl4.points = o3d.utility.Vector3dVector(world_points4)
    img_numpy4 = image4[0].cpu().numpy()
    img_numpy4 = img_numpy4.reshape((3, -1)).transpose()

    img_numpy1 = img_numpy1[not_masked1]
    pcl1.colors = o3d.utility.Vector3dVector(img_numpy1)
    img_numpy2 = img_numpy2[not_masked2]
    pcl2.colors = o3d.utility.Vector3dVector(img_numpy2)
    img_numpy3 = img_numpy3[not_masked3]
    pcl3.colors = o3d.utility.Vector3dVector(img_numpy3)
    img_numpy4 = img_numpy4[not_masked4]
    pcl4.colors = o3d.utility.Vector3dVector(img_numpy4)
    #pcl.paint_uniform_color([1.0, 0.0, 0])

    #print("Radius oulier removal")
    #cl, ind = pcl.remove_radius_outlier(nb_points=10, radius=0.5)
    #display_inlier_outlier(pcl, ind)

    remove_outliers = True
    if remove_outliers:
        cl1, ind1 = pcl1.remove_statistical_outlier(nb_neighbors=10,
                                                    std_ratio=1.3)
        inlier_cloud1 = pcl1.select_by_index(ind1)
        outlier_cloud1 = pcl1.select_by_index(ind1, invert=True)
        outlier_cloud1.paint_uniform_color([0.0, 0.0, 1.0])
        cl2, ind2 = pcl2.remove_statistical_outlier(nb_neighbors=10,
                                                    std_ratio=1.3)
        inlier_cloud2 = pcl2.select_by_index(ind2)
        outlier_cloud2 = pcl2.select_by_index(ind2, invert=True)
        outlier_cloud2.paint_uniform_color([0.0, 0.0, 1.0])
        cl3, ind3 = pcl3.remove_statistical_outlier(nb_neighbors=10,
                                                    std_ratio=1.3)
        inlier_cloud3 = pcl3.select_by_index(ind3)
        outlier_cloud3 = pcl3.select_by_index(ind3, invert=True)
        outlier_cloud3.paint_uniform_color([0.0, 0.0, 1.0])
        cl4, ind4 = pcl4.remove_statistical_outlier(nb_neighbors=10,
                                                    std_ratio=1.3)
        inlier_cloud4 = pcl4.select_by_index(ind4)
        outlier_cloud4 = pcl4.select_by_index(ind4, invert=True)
        outlier_cloud4.paint_uniform_color([0.0, 0.0, 1.0])

    if hasGTdepth1:
        pcl_gt1 = o3d.geometry.PointCloud()
        pcl_gt1.points = o3d.utility.Vector3dVector(gt_depth_3d1)
        pcl_gt1.paint_uniform_color([1.0, 0.0, 0])
    if hasGTdepth2:
        pcl_gt2 = o3d.geometry.PointCloud()
        pcl_gt2.points = o3d.utility.Vector3dVector(gt_depth_3d2)
        pcl_gt2.paint_uniform_color([1.0, 0.0, 0])
    if hasGTdepth3:
        pcl_gt3 = o3d.geometry.PointCloud()
        pcl_gt3.points = o3d.utility.Vector3dVector(gt_depth_3d3)
        pcl_gt3.paint_uniform_color([1.0, 0.0, 0])
    if hasGTdepth4:
        pcl_gt4 = o3d.geometry.PointCloud()
        pcl_gt4.points = o3d.utility.Vector3dVector(gt_depth_3d4)
        pcl_gt4.paint_uniform_color([1.0, 0.0, 0])

    if remove_outliers:
        toPlot = [inlier_cloud1, inlier_cloud2, inlier_cloud3, inlier_cloud4]
        if hasGTdepth1:
            toPlot.append(pcl_gt1)
        if hasGTdepth2:
            toPlot.append(pcl_gt2)
        if hasGTdepth3:
            toPlot.append(pcl_gt3)
        if hasGTdepth4:
            toPlot.append(pcl_gt4)
        toPlotClear = list(toPlot)
        toPlot.append(outlier_cloud1)
        toPlot.append(outlier_cloud2)
        toPlot.append(outlier_cloud3)
        toPlot.append(outlier_cloud4)
        o3d.visualization.draw_geometries(toPlot)
        o3d.visualization.draw_geometries(toPlotClear)

    toPlot = [pcl1, pcl2, pcl3, pcl4]
    if hasGTdepth1:
        toPlot.append(pcl_gt1)
    if hasGTdepth2:
        toPlot.append(pcl_gt2)
    if hasGTdepth3:
        toPlot.append(pcl_gt3)
    if hasGTdepth4:
        toPlot.append(pcl_gt4)
    o3d.visualization.draw_geometries(toPlot)

    bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1),
                                               max_bound=(1000, 1000, 5))
    toPlot = [
        pcl1.crop(bbox),
        pcl2.crop(bbox),
        pcl3.crop(bbox),
        pcl4.crop(bbox)
    ]
    if hasGTdepth1:
        toPlot.append(pcl_gt1)
    if hasGTdepth2:
        toPlot.append(pcl_gt2)
    if hasGTdepth3:
        toPlot.append(pcl_gt3)
    if hasGTdepth4:
        toPlot.append(pcl_gt4)
    o3d.visualization.draw_geometries(toPlot)

    rgb1 = image1[0].permute(1, 2, 0).detach().cpu().numpy() * 255
    rgb2 = image2[0].permute(1, 2, 0).detach().cpu().numpy() * 255
    rgb3 = image3[0].permute(1, 2, 0).detach().cpu().numpy() * 255
    rgb4 = image4[0].permute(1, 2, 0).detach().cpu().numpy() * 255
    # Prepare inverse depth
    viz_pred_inv_depth1 = viz_inv_depth(pred_inv_depth1[0]) * 255
    viz_pred_inv_depth2 = viz_inv_depth(pred_inv_depth2[0]) * 255
    viz_pred_inv_depth3 = viz_inv_depth(pred_inv_depth3[0]) * 255
    viz_pred_inv_depth4 = viz_inv_depth(pred_inv_depth4[0]) * 255
    # Concatenate both vertically
    image1 = np.concatenate([rgb1, viz_pred_inv_depth1], 0)
    image2 = np.concatenate([rgb2, viz_pred_inv_depth2], 0)
    image3 = np.concatenate([rgb3, viz_pred_inv_depth3], 0)
    image4 = np.concatenate([rgb4, viz_pred_inv_depth4], 0)
    # Save visualization
    print('Saving {} to {}'.format(
        pcolor(input_file1, 'cyan', attrs=['bold']),
        pcolor(output_file1, 'magenta', attrs=['bold'])))
    imwrite(output_file1, image1[:, :, ::-1])
    print('Saving {} to {}'.format(
        pcolor(input_file2, 'cyan', attrs=['bold']),
        pcolor(output_file2, 'magenta', attrs=['bold'])))
    imwrite(output_file2, image2[:, :, ::-1])
    print('Saving {} to {}'.format(
        pcolor(input_file3, 'cyan', attrs=['bold']),
        pcolor(output_file3, 'magenta', attrs=['bold'])))
    imwrite(output_file3, image3[:, :, ::-1])
    print('Saving {} to {}'.format(
        pcolor(input_file4, 'cyan', attrs=['bold']),
        pcolor(output_file4, 'magenta', attrs=['bold'])))
    imwrite(output_file4, image4[:, :, ::-1])
예제 #26
0
                              np.array([[0.046296, -7.33178]])).float(),
                          scale_factors=torch.from_numpy(np.array([[1.0, 1.0]
                                                                   ])).float())

cam_left = CameraFisheye(path_to_theta_lut=[
    '/home/data/vbelissen/valeo_multiview/calibrations_theta_lut/fisheye/test/20170320_163113/20170320_163113_cam_0_1280_800.npy'
],
                         path_to_ego_mask=[''],
                         poly_coeffs=torch.from_numpy(
                             np.array([[282.85, -27.8671, 114.318,
                                        -36.6703]])).float(),
                         principal_point=torch.from_numpy(
                             np.array([[0.046296, -7.33178]])).float(),
                         scale_factors=torch.from_numpy(np.array([[1.0, 1.0]
                                                                  ])).float(),
                         Tcw=Pose(relative_pose_left_torch))

cam_front = cam_front.to(torch.device('cuda'))
cam_left = cam_left.to(torch.device('cuda'))

world_points = cam_front.reconstruct(simulated_depth, frame='w')

ref_coords_left = cam_left.project(world_points, frame='w')

warped_front_left = funct.grid_sample(left_img_torch,
                                      ref_coords_left,
                                      mode='bilinear',
                                      padding_mode='zeros',
                                      align_corners=True)

warped_front_left_PIL = torch.transpose(warped_front_left.unsqueeze(4), 1,
예제 #27
0
import cv2
import open3d as o3d

main_folder = '/home/vbelissen/test_data/valeo_data_ready2train/data/dataset_valeo_cea_2017_2018/'
seq_idx = '20170320_144339'
img_idx = '00011702'

path_to_theta_lut = [
    main_folder + 'images/fisheye/train/' + seq_idx +
    '/cam_0/theta_tensor_1280_800.npy'
]
poly_coeffs = torch.Tensor([282.85, -27.8671, 114.318, -36.6703]).unsqueeze(0)
principal_point = torch.Tensor([0.046296, -7.33178]).unsqueeze(0)
scale_factors = torch.Tensor([1., 1. / 1.00173]).unsqueeze(0)
Tcw = Pose.identity(len(poly_coeffs))
Twc = Tcw.inverse()

r = R.from_quat([1, 0, 0, 0])

depth_map_valeo = np.zeros((1, 1, 800, 1280))
depth_map_valeo[0, 0, :, :] = \
    np.load(main_folder + 'depth_maps/fisheye/train/' + seq_idx + '/velodyne_0/' + seq_idx + '_velodyne_0_' + img_idx + '.npz')['velodyne_depth']
depth_map_valeo = depth_map_valeo.astype('float32')

depth_map_valeo_tensor = torch.from_numpy(depth_map_valeo)


def reconstruct(depth, frame='w'):
    """
    Reconstructs pixel-wise 3D points from a depth map.
예제 #28
0
    def forward(self, batch, return_logs=False, progress=0.0):
        """
        Processes a batch.

        Parameters
        ----------
        batch : dict
            Input batch
        return_logs : bool
            True if logs are stored
        progress :
            Training progress percentage

        Returns
        -------
        output : dict
            Dictionary containing a "loss" scalar and different metrics and predictions
            for logging and downstream usage.
        """
        # Calculate predicted depth and pose output
        output = super().forward(batch, return_logs=return_logs)
        if not self.training:
            # If not training, no need for self-supervised loss
            return output
        else:
            # Otherwise, calculate self-supervised loss
            self_sup_output = self.self_supervised_loss(
                batch['rgb_original'],
                batch['rgb_context_original'],
                output['inv_depths'],
                output['poses'],
                batch['intrinsics'],
                return_logs=return_logs,
                progress=progress)
            if len(batch['rgb_context']
                   ) == 2 and self.pose_consistency_loss_weight != 0.:
                pose_contexts = self.compute_poses(batch['rgb_context'][0],
                                                   [batch['rgb_context'][1]])
                pose_consistency_output = self.pose_consistency_loss(
                    invert_pose(output['poses'][0].item()),
                    output['poses'][1].item(), pose_contexts[0].item())
                pose_consistency_output[
                    'loss'] *= self.pose_consistency_loss_weight
                output = merge_outputs(output, pose_consistency_output)
            if self.identity_pose_loss_weight != 0:
                # Identity pose loss. pose(I_t, I_t+1) = pose(I_t+1, It)^⁻1
                pose_vec_minus1_0 = self.pose_net(batch['rgb_context'][0],
                                                  batch['rgb'])
                pose_vec_minus1_0 = Pose.from_vec(pose_vec_minus1_0,
                                                  self.rotation_mode)
                identity_pose_output_minus1_0 = self.identity_pose_loss(
                    output['poses'][0].item(), invert_pose(pose_vec_minus1_0))

                pose_vec_01 = self.pose_net(batch['rgb_context'][1],
                                            batch['rgb'])
                pose_vec_01 = Pose.from_vec(pose_vec_01, self.rotation_mode)
                identity_pose_output_01 = self.identity_pose_loss(
                    output['poses'][1].item(), invert_pose(pose_vec_01))

                identity_pose_output_minus1_0[
                    'loss'] *= self.identity_pose_loss_weight
                identity_pose_output_01[
                    'loss'] *= self.identity_pose_loss_weight
                output = merge_outputs(output, identity_pose_output_minus1_0,
                                       identity_pose_output_01)
            if self.temporal_consistency_loss_weight != 0:
                # Temporal consistency: D_t = proj_on_t(D_t-1) + pose_t-1_t[3,3]
                temporal_consistency_loss = []  # for each ref image
                for j, (ref_image, pose) in enumerate(
                        zip(batch['rgb_context'], output['poses'])):
                    ref_warped_depths = warp_inv_depth(output['inv_depths'],
                                                       batch['intrinsics'],
                                                       batch['intrinsics'],
                                                       pose)
                    # add z from pose to warped depth
                    ref_warped_depths = [
                        ref_warped_depth + pose[:, 3, 3]
                        for ref_warped_depth in ref_warped_depths
                    ]

                    ref_inv_depths = self.compute_inv_depths(ref_image)
                    ref_depths = [
                        inv2depth(ref_inv_depths[i])
                        for i in range(len(ref_inv_depths))
                    ]

                    l1_loss = self.temporal_consistency_loss(
                        ref_warped_depths, ref_depths)

                    temporal_consistency_loss.append([i for i in l1_loss])
                temporal_consistency_loss = temporal_consistency_loss.mean(
                ) * self.temporal_consistency_loss_weight
                output = merge_outputs(output, temporal_consistency_loss)
            # Return loss and metrics
            return {
                'loss': self_sup_output['loss'],
                **merge_outputs(output, self_sup_output),
            }
            def photo_loss_2imgs(i_cam1, i_cam2, rot_vect_list, save_pictures,
                                 rot_str):

                # Computes the photometric loss between 2 images of adjacent cameras
                # It reconstructs each image from the adjacent one, using calibration data,
                # depth prediction model and correction angles alpha, beta, gamma

                for i, i_cam in enumerate([i_cam1, i_cam2]):
                    base_folder_str = get_base_folder(
                        input_files[i_cam][i_file])
                    split_type_str = get_split_type(input_files[i_cam][i_file])
                    seq_name_str = get_sequence_name(
                        input_files[i_cam][i_file])
                    camera_str = get_camera_name(input_files[i_cam][i_file])
                    calib_data = {}
                    calib_data[camera_str] = read_raw_calib_files_camera_valeo(
                        base_folder_str, split_type_str, seq_name_str,
                        camera_str)
                    pose_matrix = get_extrinsics_pose_matrix_extra_rot_torch(
                        input_files[i_cam][i_file], calib_data,
                        rot_vect_list[i]).unsqueeze(0)
                    pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(
                        rank()),
                                                       dtype=dtype)
                    CameraFisheye.Twc.fget.cache_clear()
                    cams[i_cam].Tcw = pose_tensor

                world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1],
                                                         frame='w')
                world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2],
                                                         frame='w')

                #depth1_wrt_cam2 = torch.norm(cams[i_cam2].Tcw @ world_points1, dim=1, keepdim=True)
                #depth2_wrt_cam1 = torch.norm(cams[i_cam1].Tcw @ world_points2, dim=1, keepdim=True)

                ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w')
                ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w')

                reconstructedImg2to1 = funct.grid_sample(images[i_cam2] *
                                                         not_masked[i_cam2],
                                                         ref_coords1to2,
                                                         mode='bilinear',
                                                         padding_mode='zeros',
                                                         align_corners=True)
                reconstructedImg1to2 = funct.grid_sample(images[i_cam1] *
                                                         not_masked[i_cam1],
                                                         ref_coords2to1,
                                                         mode='bilinear',
                                                         padding_mode='zeros',
                                                         align_corners=True)

                #print(reconstructedImg2to1)
                # z = reconstructedImg2to1.sum()
                # z.backward()

                if save_pictures:
                    if epoch == 0:
                        cv2.imwrite(
                            '/home/vbelissen/Downloads/cam_' + str(i_cam1) +
                            '_' + str(i_file) + '_orig.png',
                            torch.transpose(
                                (images[i_cam1][0, :, :, :]
                                 ).unsqueeze(0).unsqueeze(4), 1,
                                4).squeeze().detach().cpu().numpy() * 255)
                        cv2.imwrite(
                            '/home/vbelissen/Downloads/cam_' + str(i_cam2) +
                            '_' + str(i_file) + '_orig.png',
                            torch.transpose(
                                (images[i_cam2][0, :, :, :]
                                 ).unsqueeze(0).unsqueeze(4), 1,
                                4).squeeze().detach().cpu().numpy() * 255)
                    cv2.imwrite(
                        '/home/vbelissen/Downloads/cam_' + str(i_cam1) + '_' +
                        str(i_file) + '_recons_from_' + str(i_cam2) + '_rot' +
                        rot_str + '.png',
                        torch.transpose(
                            ((reconstructedImg2to1 * not_masked[i_cam1]
                              )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1,
                            4).squeeze().detach().cpu().numpy() * 255)
                    cv2.imwrite(
                        '/home/vbelissen/Downloads/cam_' + str(i_cam2) + '_' +
                        str(i_file) + '_recons_from_' + str(i_cam1) + '_rot' +
                        rot_str + '.png',
                        torch.transpose(
                            ((reconstructedImg1to2 * not_masked[i_cam2]
                              )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1,
                            4).squeeze().detach().cpu().numpy() * 255)

                #reconstructedDepth2to1 = funct.grid_sample(pred_depths[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True)
                #reconstructedDepth1to2 = funct.grid_sample(pred_depths[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True)

                #reconstructedEgo2to1 = funct.grid_sample(not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True)
                #reconstructedEgo1to2 = funct.grid_sample(not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True)

                l1_loss_1 = torch.abs(images[i_cam1] * not_masked[i_cam1] -
                                      reconstructedImg2to1 *
                                      not_masked[i_cam1])
                l1_loss_2 = torch.abs(images[i_cam2] * not_masked[i_cam2] -
                                      reconstructedImg1to2 *
                                      not_masked[i_cam2])

                # SSIM loss
                ssim_loss_weight = 0.85
                ssim_loss_1 = SSIM(images[i_cam1] * not_masked[i_cam1],
                                   reconstructedImg2to1 * not_masked[i_cam1],
                                   C1=1e-4,
                                   C2=9e-4,
                                   kernel_size=3)
                ssim_loss_2 = SSIM(images[i_cam2] * not_masked[i_cam2],
                                   reconstructedImg1to2 * not_masked[i_cam2],
                                   C1=1e-4,
                                   C2=9e-4,
                                   kernel_size=3)

                ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.)
                ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.)

                # Weighted Sum: alpha * ssim + (1 - alpha) * l1
                photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(
                    1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(
                        1, True)
                photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(
                    1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(
                        1, True)

                mask1 = photometric_loss_1 != 0
                s1 = mask1.sum()
                loss_1 = (photometric_loss_1 *
                          mask1).sum() / s1 if s1 > 0 else 0

                mask2 = photometric_loss_2 != 0
                s2 = mask2.sum()
                loss_2 = (photometric_loss_2 *
                          mask2).sum() / s2 if s2 > 0 else 0

                #valid1 = ...
                #valid2 = ...

                return loss_1 + loss_2
예제 #30
0
    def warp_ref_image_tensor(self, inv_depths, ref_image, ref_tensor,
                              path_to_theta_lut, path_to_ego_mask, poly_coeffs,
                              principal_point, scale_factors,
                              ref_path_to_theta_lut, ref_path_to_ego_mask,
                              ref_poly_coeffs, ref_principal_point,
                              ref_scale_factors, same_timestamp_as_origin,
                              pose_matrix_context, pose):
        """
        Warps a reference image to produce a reconstruction of the original one.

        Parameters
        ----------
        inv_depths : torch.Tensor [B,1,H,W]
            Inverse depth map of the original image
        ref_image : torch.Tensor [B,3,H,W]
            Reference RGB image
        K : torch.Tensor [B,3,3]
            Original camera intrinsics
        ref_K : torch.Tensor [B,3,3]
            Reference camera intrinsics
        pose : Pose
            Original -> Reference camera transformation

        Returns
        -------
        ref_warped : torch.Tensor [B,3,H,W]
            Warped reference image (reconstructing the original one)
        """
        B, _, H, W = ref_image.shape
        device = ref_image.get_device()
        # Generate cameras for all scales
        cams, ref_cams = [], []
        pose_matrix = torch.zeros(B, 4, 4)
        for b in range(B):
            if same_timestamp_as_origin[b]:
                pose_matrix[
                    b, :3,
                    3] = pose.mat[b, :3, :3] @ pose_matrix_context[b, :3, 3]
                pose_matrix[b, 3, 3] = 1
                pose_matrix[b, :3, :3] = pose.mat[
                    b, :3, :3] @ pose_matrix_context[b, :3, :3]
            else:
                pose_matrix[b, :, :] = pose.mat[b, :, :]
        # pose_matrix = torch.zeros(B, 4, 4)
        # for b in range(B):
        #     if not same_timestamp_as_origin[b]:
        #         pose_matrix[b, :, :] = pose.mat[b, :, :]
        #     else:
        #         pose_matrix[b, :, :] = pose_matrix_context[b, :, :]
        pose_matrix = Pose(pose_matrix)
        for i in range(self.n):
            _, _, DH, DW = inv_depths[i].shape
            scale_factor = DW / float(W)
            cams.append(
                CameraFisheye(path_to_theta_lut=path_to_theta_lut,
                              path_to_ego_mask=path_to_ego_mask,
                              poly_coeffs=poly_coeffs.float(),
                              principal_point=principal_point.float(),
                              scale_factors=scale_factors.float()).scaled(
                                  scale_factor).to(device))
            ref_cams.append(
                CameraFisheye(path_to_theta_lut=ref_path_to_theta_lut,
                              path_to_ego_mask=ref_path_to_ego_mask,
                              poly_coeffs=ref_poly_coeffs.float(),
                              principal_point=ref_principal_point.float(),
                              scale_factors=ref_scale_factors.float(),
                              Tcw=pose_matrix).scaled(scale_factor).to(device))
        # View synthesis
        depths = [inv2depth(inv_depths[i]) for i in range(self.n)]
        ref_images = match_scales(ref_image, inv_depths, self.n)
        ref_warped = [
            view_synthesis(ref_images[i],
                           depths[i],
                           ref_cams[i],
                           cams[i],
                           padding_mode=self.padding_mode,
                           mode='bilinear') for i in range(self.n)
        ]
        ref_tensors = match_scales(ref_tensor,
                                   inv_depths,
                                   self.n,
                                   mode='nearest',
                                   align_corners=None)
        ref_tensors_warped = [
            view_synthesis(ref_tensors[i],
                           depths[i],
                           ref_cams[i],
                           cams[i],
                           padding_mode=self.padding_mode,
                           mode='nearest',
                           align_corners=None) for i in range(self.n)
        ]
        #print(ref_tensors[0][:, :, ::40, ::40])
        #print(ref_tensors_warped[0][:, :, ::40, ::40])
        # Return warped reference image
        return ref_warped, ref_tensors_warped