def forward(self, batch, return_logs=False, progress=0.0): """ Processes a batch. Parameters ---------- batch : dict Input batch return_logs : bool True if logs are stored progress : Training progress percentage Returns ------- output : dict Dictionary containing a "loss" scalar and different metrics and predictions for logging and downstream usage. """ if not self.training: # If not training, no need for self-supervised loss return SfmModel.forward(self, batch) else: # Calculate predicted depth and pose output output = super().forward(batch, return_logs=return_logs) # Introduce poses ground_truth poses_gt = [[], []] poses_gt[0], poses_gt[1] = torch.zeros((6, 4, 4)), torch.zeros((6, 4, 4)) for i in range(6): poses_gt[0][i] = batch['pose_context'][0][i].inverse() @ batch['pose'][i] poses_gt[1][i] = batch['pose_context'][1][i].inverse() @ batch['pose'][i] poses_gt = [Pose(poses_gt[0]), Pose(poses_gt[1])] multiview_loss = self.multiview_photometric_loss( batch['rgb_original'], batch['rgb_context_original'], output['inv_depths'], poses_gt, batch['intrinsics'], batch['extrinsics'], return_logs=return_logs, progress=progress) # loss = multiview_loss['loss'] loss = 0. # Calculate supervised loss supervision_loss = self.supervised_loss(output['inv_depths'], depth2inv(batch['depth']), return_logs=return_logs, progress=progress) loss += self.supervised_loss_weight * supervision_loss['loss'] # Return loss and metrics return { 'loss': loss, **merge_outputs(merge_outputs(multiview_loss, supervision_loss), output) }
def compute_pose_net(self, image, contexts): """Compute poses from image and a sequence of context images""" pose_vec = self.pose_net(image, contexts) return [ Pose.from_vec(pose_vec[:, i], self.rotation_mode) for i in range(pose_vec.shape[1]) ]
def __init__(self, path_to_theta_lut, poly_coeffs, principal_point=torch.Tensor([0., 0.]), scale_factors=torch.Tensor([1., 1.]), Tcw=None): """ Initializes the Camera class Parameters ---------- intrinsics : dictionary (keys : ax, ay, cx, cy, c1, c2, c3, c5) Camera intrinsics poly_coeffs [c1, c2, c3, c4] principal_point [cx, cy] scale_factors [ax, ay] Tcw : Pose Camera -> World pose transformation """ super().__init__() self.path_to_theta_lut = path_to_theta_lut self.poly_coeffs = poly_coeffs self.principal_point = principal_point self.scale_factors = scale_factors #self.K = K self.Tcw = Pose.identity( len(poly_coeffs) ) if Tcw is None else Tcw #Pose.identity(len(K)) if Tcw is None else Tcw
def __init__(self, poly_coeffs, principal_point, scale_factors, K, k1, k2, k3, p1, p2, camera_type, #int Tensor ; 0 is fisheye, 1 is distorted, 2 is other Tcw=None): """ Initializes the Camera class Parameters ---------- intrinsics : dictionary (keys : ax, ay, cx, cy, c1, c2, c3, c5) Camera intrinsics poly_coeffs [c1, c2, c3, c4] principal_point [cx, cy] scale_factors [ax, ay] Tcw : Pose Camera -> World pose transformation """ super().__init__() self.poly_coeffs = poly_coeffs self.principal_point = principal_point self.scale_factors = scale_factors self.K = K self.k1 = k1 self.k2 = k2 self.k3 = k3 self.p1 = p1 self.p2 = p2 self.camera_type = camera_type self.Tcw = Pose.identity(len(camera_type)) if Tcw is None else Tcw
def __init__(self, K, Tcw=None): """ Initializes the Camera class Parameters ---------- K : torch.Tensor [3,3] Camera intrinsics Tcw : Pose Camera -> World pose transformation """ super().__init__() self.K = K self.Tcw = Pose.identity(len(K)) if Tcw is None else Tcw
def __init__(self, R, Tcw=None): """ Initializes the Camera class Parameters ---------- R : torch.Tensor [B, 3, H, W] Camera ray surface Tcw : Pose Camera -> World pose transformation """ super().__init__() self.ray_surface = R self.Tcw = Pose.identity(1) if Tcw is None else Tcw
def pcl_distance(extra_rot_deg): for i_cam in range(4): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix_extra_rot( input_files[i_cam][0], calib_data, extra_rot_deg[3 * i_cam + 0], extra_rot_deg[3 * i_cam + 1], extra_rot_deg[3 * i_cam + 2])).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()), dtype=dtype) CameraFisheye.Twc.fget.cache_clear() cams[i_cam].Tcw = pose_tensor world_points[i_cam] = cams[i_cam].reconstruct( pred_depths[i_cam], frame='w') world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape( (3, -1)).transpose() world_points[i_cam] = world_points[i_cam][ not_masked[i_cam]].astype(np.float64) pcl_full[i_cam].points = o3d.utility.Vector3dVector( world_points[i_cam]) dist_total = np.zeros(4) for i_cam in range(4): i_cam1 = i_cam i_cam2 = (i_cam + 1) % 4 pcl1 = pcl_full[i_cam1].select_by_index( ind_clean_right[i_cam1]).uniform_down_sample( 100).select_by_index(close_points1[i_cam]) pcl2 = pcl_full[i_cam2].select_by_index( ind_clean_left[i_cam2]).uniform_down_sample( 100).select_by_index(close_points2[i_cam]) dists = pcl1.compute_point_cloud_distance(pcl2) dist_total[i_cam] = np.mean(np.asarray(dists)) return np.mean(dist_total)
np.array([[0.046296, -7.33178]])).float(), scale_factors=torch.from_numpy(np.array([[1.0, 1.0] ])).float()) cam_left = CameraFisheye(path_to_theta_lut=[ '/home/data/vbelissen/valeo_multiview/calibrations_theta_lut/fisheye/test/20170320_163113/20170320_163113_cam_0_1280_800.npy' ], path_to_ego_mask=[''], poly_coeffs=torch.from_numpy( np.array([[282.85, -27.8671, 114.318, -36.6703]])).float(), principal_point=torch.from_numpy( np.array([[0.046296, -7.33178]])).float(), scale_factors=torch.from_numpy(np.array([[1.0, 1.0] ])).float(), Tcw=Pose(pose_matrix_left_torch)) cam_front = cam_front.to(torch.device('cuda')) cam_left = cam_left.to(torch.device('cuda')) world_points = cam_front.reconstruct(simulated_depth, frame='w') ref_coords_left = cam_left.project(world_points, frame='w') warped_front_left = funct.grid_sample(left_img_torch, ref_coords_left, mode='bilinear', padding_mode='zeros', align_corners=True) warped_front_left_PIL = torch.transpose(warped_front_left.unsqueeze(4), 1,
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape): """ Process a list of input files to plot and save visualization. Parameters ---------- input_files : list (number of cameras) of lists (number of files) of str Image file output_folder : str Output folder where the output will be saved model_wrappers : nn.Module List of model wrappers used for inference image_shape : Image shape Input image shape """ N_cams = len(input_files) N_files = len(input_files[0]) camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name(input_files[i_cam][0])) cams = [] not_masked = [] # Depth limits if specified if args.max_depths is not None: cap_depths = True depth_limits = [0.9 * m for m in args.max_depths] else: cap_depths = False # Let's assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo_with_suffix( base_folder_str, split_type_str, seq_name_str, camera_str, args.calibrations_suffix) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors, K, k, p = get_full_intrinsics( input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) K = torch.from_numpy(K).unsqueeze(0) k = torch.from_numpy(k).unsqueeze(0) p = torch.from_numpy(p).unsqueeze(0) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) camera_type = get_camera_type(input_files[i_cam][0], calib_data) camera_type_int = torch.tensor([get_camera_type_int(camera_type)]) # Initialization of cameras cams.append( CameraMultifocal(poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), K=K.float(), k1=k[:, 0].float(), k2=k[:, 1].float(), k3=k[:, 2].float(), p1=p[:, 0].float(), p2=p[:, 1].float(), camera_type=camera_type_int, Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank())) # Using ego masks to create a "not_masked" bool array ego_mask = np.load(path_to_ego_mask) not_masked.append(ego_mask.astype(bool).reshape(-1)) # Computing the approximate center of the car as the barycenter of cameras cams_middle = np.zeros(3) i_cc_max = min(N_cams, 4) for c in range(3): for i_cc in range(i_cc_max): cams_middle[c] += cams[i_cc].Twc.mat.cpu().numpy()[0, c, 3] / i_cc_max # Create output dirs for each cam seq_name = get_sequence_name(input_files[0][0]) for i_cam in range(N_cams): os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True) os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True) first_pic = True # The first picture will be used to create a special sequence # Iteration on files, using a specified step for i_file in range(0, N_files, args.step_in_input): base_0, ext_0 = os.path.splitext( os.path.basename(input_files[0][i_file])) print(base_0) # Initialize empty arrays that will be populated by each cam images = [] images_numpy = [] predicted_masks = [] pred_inv_depths = [] pred_depths = [] world_points = [] input_depth_files = [] has_gt_depth = [] input_pred_masks = [] has_pred_mask = [] gt_depth = [] gt_depth_3d = [] pcl_full = [] pcl_only_inliers = [] pcl_gt = [] rgb = [] viz_pred_inv_depths = [] great_lap = [] mask_depth_capped = [] final_masks = [] # First iteration on cameras: loading images, semantic masks and prediction depth for i_cam in range(N_cams): # Loading rgb images images.append( load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank())) # If specified, loading predicted semantic masks if args.load_pred_masks: input_pred_mask_file = input_files[i_cam][i_file].replace( 'images_multiview', 'pred_mask') input_pred_masks.append(input_pred_mask_file) has_pred_mask.append(os.path.exists(input_pred_masks[i_cam])) predicted_masks.append( load_image(input_pred_mask_file).convert('RGB')) predicted_masks[i_cam] = resize_image(predicted_masks[i_cam], image_shape) predicted_masks[i_cam] = to_tensor( predicted_masks[i_cam]).unsqueeze(0) if torch.cuda.is_available(): predicted_masks[i_cam] = predicted_masks[i_cam].to( 'cuda:{}'.format(rank())) # Predicting depth using loaded wrappers pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) # Second iteration on cameras: reconstructing 3D and applying masks for i_cam in range(N_cams): # If specified, create a unified depth map (still in beta) if args.mix_depths: # TODO : adapter au cas ou la Long Range est incluse # Depth will be unified between original camera and left/right camera (thus 3 cameras) depths = (torch.ones(1, 3, H, W) * 500).cuda() depths[0, 1, :, :] = pred_depths[i_cam][0, 0, :, :] # Iteration on left/right cameras for relative in [-1, 1]: # Load ego mask path_to_ego_mask_relative = get_path_to_ego_mask( input_files[(i_cam + relative) % 4][0]) ego_mask_relative = np.load(path_to_ego_mask_relative) ego_mask_relative = torch.from_numpy( ego_mask_relative.astype(bool)) # Reconstruct 3d points from relative depth map relative_points_3d = cams[(i_cam + relative) % 4].reconstruct( pred_depths[(i_cam + relative) % 4], frame='w') # Center of projection of current cam cop = np.zeros((3, H, W)) for c in range(3): cop[c, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, c, 3] # distances of 3d points to cop of current cam distances_3d = np.linalg.norm( relative_points_3d[0, :, :, :].cpu().numpy() - cop, axis=0) distances_3d = torch.from_numpy(distances_3d).unsqueeze( 0).cuda().float() # projected points on current cam (values should be in (-1,1)), be careful X and Y are switched! projected_points_2d = cams[i_cam].project( relative_points_3d, frame='w') projected_points_2d[:, :, :, [0, 1]] = projected_points_2d[:, :, :, [1, 0]] # applying ego mask of relative cam projected_points_2d[:, ~ego_mask_relative, :] = 2 # looking for indices of inbound pixels x_ok = (projected_points_2d[0, :, :, 0] > -1) * (projected_points_2d[0, :, :, 0] < 1) y_ok = (projected_points_2d[0, :, :, 1] > -1) * (projected_points_2d[0, :, :, 1] < 1) xy_ok = x_ok * y_ok xy_ok_id = xy_ok.nonzero(as_tuple=False) # xy values of these indices (in (-1, 1)) xy_ok_X = xy_ok_id[:, 0] xy_ok_Y = xy_ok_id[:, 1] # xy values in pixels projected_points_2d_ints = (projected_points_2d + 1) / 2 projected_points_2d_ints[0, :, :, 0] = torch.round( projected_points_2d_ints[0, :, :, 0] * (H - 1)) projected_points_2d_ints[0, :, :, 1] = torch.round( projected_points_2d_ints[0, :, :, 1] * (W - 1)) projected_points_2d_ints = projected_points_2d_ints.to( dtype=int) # main equation depths[0, 1 + relative, projected_points_2d_ints[0, xy_ok_X, xy_ok_Y, 0], projected_points_2d_ints[0, xy_ok_X, xy_ok_Y, 1]] = distances_3d[0, xy_ok_X, xy_ok_Y] # Interpolate if requested if args.mix_depths_interpolate: dd = depths[0, 1 + relative, :, :].cpu().numpy() dd[dd == 500] = np.nan dd = fillMissingValues(dd, copy=True, interpolator=scipy.interpolate. LinearNDInterpolator) dd[np.isnan(dd)] = 500 dd[dd == 0] = 500 depths[0, 1 + relative, :, :] = torch.from_numpy( dd).unsqueeze(0).unsqueeze(0).cuda() # Unify depth maps by taking min depths[depths == 0] = 500 pred_depths[i_cam] = depths.min(dim=1, keepdim=True)[0] # Reconstruct 3D points world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam], frame='w')) # Switch to numpy format for use in Open3D images_numpy.append(images[i_cam][0].cpu().numpy()) # Reshape to something like a list of pixels for Open3D images_numpy[i_cam] = images_numpy[i_cam].reshape( (3, -1)).transpose() # Copy of predicted depth to be used for computing masks pred_depth_copy = pred_depths[i_cam].squeeze(0).squeeze( 0).cpu().numpy() pred_depth_copy = np.uint8(pred_depth_copy) # Final masks are initialized with ego masks final_masks.append(not_masked[i_cam]) # If depths are to be capped, final masks are updated if cap_depths: mask_depth_capped.append( (pred_depth_copy < depth_limits[i_cam]).reshape(-1)) final_masks[ i_cam] = final_masks[i_cam] * mask_depth_capped[i_cam] # If a criterion on laplacian is to be applied, final masks are updated if args.clean_with_laplacian: lap = np.uint8( np.absolute( cv2.Laplacian(pred_depth_copy, cv2.CV_64F, ksize=3))) great_lap.append(lap < args.lap_threshold) great_lap[i_cam] = great_lap[i_cam].reshape(-1) final_masks[i_cam] = final_masks[i_cam] * great_lap[i_cam] # Apply mask on images images_numpy[i_cam] = images_numpy[i_cam][final_masks[i_cam]] # Apply mask on semantic masks if applicable if args.load_pred_masks: predicted_masks[i_cam] = predicted_masks[i_cam][0].cpu().numpy( ) predicted_masks[i_cam] = predicted_masks[i_cam].reshape( (3, -1)).transpose() predicted_masks[i_cam] = predicted_masks[i_cam][ final_masks[i_cam]] # Third iteration on cameras: for i_cam in range(N_cams): # Transforming predicted 3D points into numpy and into a list-like format, then applying masks world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape( (3, -1)).transpose() world_points[i_cam] = world_points[i_cam][final_masks[i_cam]] # Look for ground truth depth data, if yes then reconstruct 3d from it input_depth_files.append( get_depth_file(input_files[i_cam][i_file], args.depth_suffix)) has_gt_depth.append(os.path.exists(input_depth_files[i_cam])) if has_gt_depth[i_cam]: gt_depth.append( np.load(input_depth_files[i_cam])['velodyne_depth'].astype( np.float32)) gt_depth[i_cam] = torch.from_numpy( gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format( rank())) gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w')) gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy() gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape( (3, -1)).transpose() else: gt_depth.append(0) gt_depth_3d.append(0) # Initialize full pointcloud pcl_full.append(o3d.geometry.PointCloud()) pcl_full[i_cam].points = o3d.utility.Vector3dVector( world_points[i_cam]) pcl_full[i_cam].colors = o3d.utility.Vector3dVector( images_numpy[i_cam]) pcl = pcl_full[i_cam] # .select_by_index(ind) # Set of operations to remove outliers # We remove points that are below -1.0 meter, or points that are higher than 1.5 meters # and of blue/green color (typically the sky) # TODO: replace with semantic "sky" masks points_tmp = np.asarray(pcl.points) colors_tmp = images_numpy[i_cam] # np.asarray(pcl.colors) mask_below = points_tmp[:, 2] < -1.0 mask_height = points_tmp[:, 2] > 1.5 # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3) mask_colors_blue1 = np.sum( np.abs(colors_tmp - np.array([0.6, 0.8, 1.0])), axis=1) < 0.6 mask_colors_blue2 = np.sum( np.abs(colors_tmp - np.array([0.8, 1.0, 1.0])), axis=1) < 0.6 mask_colors_green1 = np.sum( np.abs(colors_tmp - np.array([0.2, 1.0, 0.40])), axis=1) < 0.8 mask_colors_green2 = np.sum( np.abs(colors_tmp - np.array([0.0, 0.5, 0.15])), axis=1) < 0.2 mask = (1 - mask_below) \ * (1 - mask_height * mask_colors_blue1) \ * (1 - mask_height * mask_colors_blue2) \ * (1 - mask_height * mask_colors_green1) \ * (1 - mask_height * mask_colors_green2) # Colorize with semantic masks if specified if args.load_pred_masks: # Remove black/grey pixels from predicted semantic masks black_pixels = np.logical_or( np.sum(np.abs(predicted_masks[i_cam] * 255 - np.array([0, 0, 0])), axis=1) < 15, np.sum(np.abs(predicted_masks[i_cam] * 255 - np.array([127, 127, 127])), axis=1) < 20) ind_black_pixels = np.where(black_pixels)[0] # colorizing pixels, from a mix of semantic predictions and rgb values color_vector = args.alpha_mask_semantic * predicted_masks[ i_cam] + (1 - args.alpha_mask_semantic) * images_numpy[i_cam] color_vector[ind_black_pixels] = images_numpy[i_cam][ ind_black_pixels] pcl_full[i_cam].colors = o3d.utility.Vector3dVector( color_vector) pcl = pcl_full[i_cam] # Colorize each camera differently if specified if args.colorize_cameras: colors_tmp = args.alpha_colorize_cameras * color_list_cameras[ i_cam] + ( 1 - args.alpha_colorize_cameras) * images_numpy[i_cam] pcl.colors = o3d.utility.Vector3dVector(colors_tmp) # Apply color mask pcl = pcl.select_by_index(np.where(mask)[0]) # Remove outliers based on the number of neighbors, if specified if args.remove_neighbors_outliers: pcl, ind = pcl.remove_statistical_outlier( nb_neighbors=7, std_ratio=args.remove_neighbors_outliers_std) pcl = pcl.select_by_index(ind) # Downsample based on a voxel size, if specified if args.voxel_downsample: pcl = pcl.voxel_down_sample( voxel_size=args.voxel_size_downsampling) # Rename variable pcl pcl_only_inliers.append(pcl) # Ground-truth pointcloud: define its color with a colormap (plasma) if has_gt_depth[i_cam]: pcl_gt.append(o3d.geometry.PointCloud()) pcl_gt[i_cam].points = o3d.utility.Vector3dVector( gt_depth_3d[i_cam]) gt_inv_depth = 1 / (np.linalg.norm( gt_depth_3d[i_cam] - cams_middle, axis=1) + 1e-6) cm = get_cmap('plasma') normalizer = .35 gt_inv_depth /= normalizer pcl_gt[i_cam].colors = o3d.utility.Vector3dVector( cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3]) else: pcl_gt.append(0) # Downsample pointclouds based on the presence of nearby pointclouds with higher priority # (i.e. lidar or semantized pointcloud) if args.remove_close_points_lidar_semantic: threshold_depth2depth = 0.5 threshold_depth2lidar = 0.1 for i_cam in range(4): if has_pred_mask[i_cam]: for relative in [-1, 1]: if not has_pred_mask[(i_cam + relative) % 4]: dists = pcl_only_inliers[ (i_cam + relative) % 4].compute_point_cloud_distance( pcl_only_inliers[i_cam]) p1 = pcl_only_inliers[ (i_cam + relative) % 4].select_by_index( np.where( np.asarray(dists) > threshold_depth2depth)[0]) p2 = pcl_only_inliers[( i_cam + relative ) % 4].select_by_index( np.where( np.asarray(dists) > threshold_depth2depth) [0], invert=True).uniform_down_sample( 15) #.voxel_down_sample(voxel_size=0.5) pcl_only_inliers[(i_cam + relative) % 4] = p1 + p2 if has_gt_depth[i_cam]: down = 15 if has_pred_mask[i_cam] else 30 dists = pcl_only_inliers[ i_cam].compute_point_cloud_distance(pcl_gt[i_cam]) p1 = pcl_only_inliers[i_cam].select_by_index( np.where(np.asarray(dists) > threshold_depth2lidar)[0]) p2 = pcl_only_inliers[i_cam].select_by_index( np.where(np.asarray(dists) > threshold_depth2lidar)[0], invert=True).uniform_down_sample( down) #.voxel_down_sample(voxel_size=0.5) pcl_only_inliers[i_cam] = p1 + p2 # Define the POV list pov_list = pov_dict[args.pov_file_dict] n_povs = len(pov_list) # If specified, the first pic is frozen and the camera moves in the scene if args.plot_pov_sequence_first_pic: if first_pic: # Loop on POVs for i_cam_n in range(n_povs): vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible=True, window_name='inliers' + str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters( pov_list[i_cam_n] ) #('/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'+str(i_cam_n)+'v3.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) opt.point_size = 3.0 #opt.light_on = False #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if args.stop: vis_only_inliers.run() pcd1 = pcl_only_inliers[0] for i in range(1, N_cams): pcd1 = pcd1 + pcl_only_inliers[i] for i_cam3 in range(N_cams): if has_gt_depth[i_cam3]: pcd1 += pcl_gt[i_cam3] #o3d.io.write_point_cloud(os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1) # = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) if args.save_visualization: image = vis_only_inliers.capture_screen_float_buffer( False) plt.imsave(os.path.join(output_folder, seq_name, 'pcl', base_0, str(i_cam_n) + '.png'), np.asarray(image), dpi=1) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt first_pic = False # Plot point cloud vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible=True, window_name='inliers' + str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) if args.print_lidar: for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters( pov_list[-1] ) #('/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'+str(119)+'v3.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) opt.point_size = 3.0 #opt.light_on = False #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if args.stop: vis_only_inliers.run() pcd1 = pcl_only_inliers[0] for i in range(1, N_cams): pcd1 = pcd1 + pcl_only_inliers[i] for i_cam3 in range(N_cams): if has_gt_depth[i_cam3]: pcd1 += pcl_gt[i_cam3] o3d.io.write_point_cloud( os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1) #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) if args.save_visualization: image = vis_only_inliers.capture_screen_float_buffer(False) plt.imsave(os.path.join(output_folder, seq_name, 'pcl', 'normal', base_0 + '_normal_.png'), np.asarray(image), dpi=1) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt # Save RGB and depth maps for i_cam in range(N_cams): rgb.append( images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255) viz_pred_inv_depths.append( viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.4) * 255) viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape) == 0] = 0 # Save visualization if args.save_visualization: output_file1 = os.path.join( output_folder, seq_name, 'depth', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) output_file2 = os.path.join( output_folder, seq_name, 'rgb', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1]) imwrite(output_file2, rgb[i_cam][:, :, ::-1])
def forward(self, image, context, inv_depths, K, ref_K, extrinsics, poses, return_logs=False, progress=0.0): """ Calculates training photometric loss. (Here we need to consider temporal, spatial and temporal-spatial wise losses) Parameters ---------- image : torch.Tensor [B,3,H,W] Original image context : list of torch.Tensor [B,3,H,W] Context containing a list of reference images inv_depths : list of torch.Tensor [B,1,H,W] Predicted depth maps for the original image, in all scales K : torch.Tensor [B,3,3] Original camera intrinsics ref_K : torch.Tensor [B,3,3] Reference camera intrinsics poses : list of Pose Camera transformation between original and context return_logs : bool True if logs are saved for visualization progress : float Training percentage Returns ------- losses_and_metrics : dict Output dictionary """ # Step 0: prepare for loss calculation # Reorganize the order of camearas -- (See more API information about the datasets) image = self.sort_cameras_tensor(image) context = self.sort_cameras_tensor(context) inv_depths = self.sort_cameras_tensor(inv_depths) K = self.sort_cameras_tensor(K) ref_K = K poses = [ Pose(self.sort_cameras_tensor(poses[0].item())), Pose(self.sort_cameras_tensor(poses[1].item())) ] # If using progressive scaling self.n = self.progressive_scaling(progress) # Loop over all reference images photometric_losses = [[] for _ in range(self.n)] images = match_scales(image, inv_depths, self.n) extrinsics = torch.tensor(extrinsics, dtype=torch.float32, device="cuda") # Step 1: Calculate the losses temporal-wise for j, (ref_image, pose) in enumerate(zip(context, poses)): # Calculate warped images ref_warped = self.warp_ref_image_temporal(inv_depths, ref_image, K, ref_K, pose) # Calculate and store image loss # print('### poses shape', len(poses)) # print('poses[0].shape:', poses[0].shape) # print('###multiview_photometric_loss printing ref_warped') # print('len of images: ',len(images)) # print('shape of images[0]: ', images[0].shape) # print('len of context: ',len(context)) # print('shape of context[0]:', context[0].shape) # print('len of ref_warped: ',len(ref_warped)) # print('shape of ref_warped[0]', ref_warped[0].shape) # pic_orig = images[0][0].cpu().clone() # pic_orig = (pic_orig.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8) # pic_ref = context[0][0].cpu().clone() # pic_ref = (pic_ref.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8) # pic_warped = ref_warped[0][0].cpu().clone() # pic_warped = (pic_warped.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8) # final_frame = cv2.hconcat((pic_orig, pic_ref, pic_warped)) # cv2.imshow('temporal warping', final_frame) # cv2.waitKey() photometric_loss = self.calc_photometric_loss(ref_warped, images) for i in range(self.n): photometric_losses[i].append(self.temporal_loss_weight * photometric_loss[i]) # If using automask if self.automask_loss: # Calculate and store unwarped image loss ref_images = match_scales(ref_image, inv_depths, self.n) unwarped_image_loss = self.calc_photometric_loss( ref_images, images) for i in range(self.n): photometric_losses[i].append(self.temporal_loss_weight * unwarped_image_loss[i]) # Step 2: Calculate the losses spatial-wise # reconstruct context images num_cameras, C, H, W = image.shape # should be (6, 3, H, W) left_swap = [i for i in range(-1, num_cameras - 1)] right_swap = [i % 6 for i in range(1, num_cameras + 1)] # for i in range(num_cameras): # pic = image[i].cpu().clone() # pic = (pic.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8) # if i == 0: # final_file = pic # else: # final_file = cv2.hconcat((final_file, pic)) # cv2.imshow('6 images', final_file) # cv2.waitKey() context_spatial = [[], []] # [[B,3,H,W],[B,3,H,W]] context_spatial[0] = image[left_swap, ...] context_spatial[1] = image[right_swap, ...] K_spatial = K # tensor [B,3,3] ref_K_spatial = [[], []] # [[B,3,3],[B,3,3]] ref_K_spatial[0] = K[left_swap, ...] ref_K_spatial[1] = K[right_swap, ...] # reconstruct extrinsics extrinsics_1 = extrinsics # [B,4,4] extrinsics_2 = [[], []] # [[B,4,4],[B,4,4]] extrinsics_2[0] = extrinsics_1[left_swap, ...] extrinsics_2[1] = extrinsics_1[right_swap, ...] # calculate spatial-wise photometric loss for j, ref_image in enumerate(context_spatial): # Calculate warped images ref_warped, valid_points_masks = self.warp_ref_image_spatial( inv_depths, ref_image, K_spatial, ref_K_spatial[j], Pose(extrinsics_1), Pose(extrinsics_2[j])) # pic_orig = images[0][1].cpu().clone() # pic_orig = (pic_orig.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8) # pic_ref = context_spatial[0][1].cpu().clone() # pic_ref = (pic_ref.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8) # pic_warped = ref_warped[0][1].cpu().clone() # pic_warped = (pic_warped.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8) # pic_valid = valid_points_masks[0][1].cpu().clone() # pic_valid = (pic_valid.permute(1,2,0).detach().numpy()*255).astype(np.uint8) # final_frame = cv2.hconcat((pic_orig, pic_ref, pic_warped)) # cv2.imshow('spatial warping', final_frame) # cv2.waitKey() # # cv2.imshow('pic_valid', pic_valid) # # cv2.waitKey() # Calculate and store image loss photometric_loss = [ self.calc_photometric_loss(ref_warped, images)[i] * valid_points_masks[i] for i in range(len(valid_points_masks)) ] for i in range(self.n): photometric_losses[i].append(self.spatial_loss_weight * photometric_loss[i]) # Step 3: Calculate the loss temporal-spatial wise # reconstruct context images context_temporal_spatial = [] # [context_temporal_spatial_backward, context_temporal_spatial_forward] # [[left t-1, right t-1], [left t+1, right t+1]] # [[[B,H,W],[B,H,W]],[[B,H,W],[B,H,W]]] # reconstruct intrinsics K_temporal_spatial = K ref_K_temporal_spatial = [] # reconstruct extrinsics extrinsics_1_ts = extrinsics extrinsics_2_ts = [] # reconstruct pose poses_ts = [] for l in range(len(context)): context_temporal_spatial.append( [context[l][left_swap, ...], context[l][right_swap, ...]]) ref_K_temporal_spatial.append([ K_temporal_spatial[left_swap, ...], K_temporal_spatial[right_swap, ...] ]) extrinsics_2_ts.append( [extrinsics[left_swap, ...], extrinsics[right_swap, ...]]) poses_ts.append([ Pose(poses[l].item()[left_swap, ...]), Pose(poses[l].item()[right_swap, ...]) ]) # calculate spatial-wise photometric loss for j, (ref_image, pose) in enumerate(zip(context_temporal_spatial, poses_ts)): # Calculate warped images for k in range(len(ref_image)): ref_warped, valid_points_masks = self.warp_ref_image_spatial( inv_depths, ref_image[k], K_temporal_spatial, ref_K_temporal_spatial[j][k], Pose(extrinsics_1_ts), Pose(extrinsics_2_ts[j][k]) @ pose[k].inverse()) # for i in range(6): # pic_orig = images[0][i].cpu().clone() # pic_orig = (pic_orig.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8) # pic_ref = context_spatial[0][i].cpu().clone() # pic_ref = (pic_ref.squeeze(0).permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8) # pic_warped = ref_warped[0][i].cpu().clone() # pic_warped = (pic_warped.squeeze(0).permute(1,2,0).detach().numpy()*255).astype(np.uint8) # pic_valid = valid_points_masks[0][i].cpu().clone() # pic_valid = (pic_valid.permute(1,2,0).detach().numpy()*255).astype(np.uint8) # final_frame = cv2.hconcat((pic_orig, pic_ref, pic_warped)) # cv2.imshow('temporal spatial warping', final_frame) # cv2.waitKey() # cv2.imshow('pic_valid', pic_valid) # cv2.waitKey() # Calculate and store image loss photometric_loss = [self.calc_photometric_loss(ref_warped, images)[i] * valid_points_masks[i] \ for i in range(len(valid_points_masks))] for i in range(self.n): photometric_losses[i].append( self.temporal_spatial_loss_weight * photometric_loss[i]) # Step 4: Calculate reduced photometric loss loss = self.reduce_photometric_loss(photometric_losses) # Include smoothness loss if requested if self.smooth_loss_weight > 0.0: loss += self.calc_smoothness_loss(inv_depths, images) # Return losses and metrics return { 'loss': loss.unsqueeze(0), 'metrics': self.metrics, }
def forward(self, image, ref_images_temporal_context, ref_images_geometric_context, ref_images_geometric_context_temporal_context, inv_depths, poses_temporal_context, poses_geometric_context, poses_geometric_context_temporal_context, camera_type, intrinsics_poly_coeffs, intrinsics_principal_point, intrinsics_scale_factors, intrinsics_K, intrinsics_k, intrinsics_p, path_to_ego_mask, camera_type_geometric_context, intrinsics_poly_coeffs_geometric_context, intrinsics_principal_point_geometric_context, intrinsics_scale_factors_geometric_context, intrinsics_K_geometric_context, intrinsics_k_geometric_context, intrinsics_p_geometric_context, path_to_ego_mask_geometric_context, return_logs=False, progress=0.0): """ Calculates training photometric loss. Parameters ---------- image : torch.Tensor [B,3,H,W] Original image context : list of torch.Tensor [B,3,H,W] Context containing a list of reference images inv_depths : list of torch.Tensor [B,1,H,W] Predicted depth maps for the original image, in all scales K : torch.Tensor [B,3,3] Original camera intrinsics ref_K : torch.Tensor [B,3,3] Reference camera intrinsics poses : list of Pose Camera transformation between original and context return_logs : bool True if logs are saved for visualization progress : float Training percentage Returns ------- losses_and_metrics : dict Output dictionary """ # If using progressive scaling self.n = self.progressive_scaling(progress) # Loop over all reference images photometric_losses = [[] for _ in range(self.n)] images = match_scales(image, inv_depths, self.n) n_temporal_context = len(ref_images_temporal_context) n_geometric_context = len(ref_images_geometric_context) assert len(ref_images_geometric_context_temporal_context ) == n_temporal_context * n_geometric_context B = len(path_to_ego_mask) device = image.get_device() # getting ego masks for target and source cameras # fullsize mask H_full = 800 W_full = 1280 ego_mask_tensor = torch.ones(B, 1, H_full, W_full).to(device) ref_ego_mask_tensor_geometric_context = [] for i_geometric_context in range(n_geometric_context): ref_ego_mask_tensor_geometric_context.append( torch.ones(B, 1, H_full, W_full).to(device)) for b in range(B): ego_mask_tensor[b, 0] = torch.from_numpy( np.load(path_to_ego_mask[b])).float() for i_geometric_context in range(n_geometric_context): if camera_type_geometric_context[b, i_geometric_context] != 2: ref_ego_mask_tensor_geometric_context[i_geometric_context][b, 0] = \ torch.from_numpy(np.load(path_to_ego_mask_geometric_context[i_geometric_context][b])).float() # resized masks ego_mask_tensors = [] ref_ego_mask_tensors_geometric_context = [] for i_geometric_context in range(n_geometric_context): ref_ego_mask_tensors_geometric_context.append([]) for i in range(self.n): _, _, H, W = images[i].shape if W < W_full: ego_mask_tensors.append( interpolate_image(ego_mask_tensor, shape=(B, 1, H, W), mode='nearest', align_corners=None)) for i_geometric_context in range(n_geometric_context): ref_ego_mask_tensors_geometric_context[ i_geometric_context].append( interpolate_image( ref_ego_mask_tensor_geometric_context[ i_geometric_context], shape=(B, 1, H, W), mode='nearest', align_corners=None)) else: ego_mask_tensors.append(ego_mask_tensor) for i_geometric_context in range(n_geometric_context): ref_ego_mask_tensors_geometric_context[ i_geometric_context].append( ref_ego_mask_tensor_geometric_context[ i_geometric_context]) # Dummy camera mask (B x n_geometric_context) Cmask = (camera_type_geometric_context == 2).detach() # temporal context for j, (ref_image, pose) in enumerate( zip(ref_images_temporal_context, poses_temporal_context)): # Calculate warped images ref_warped, ref_ego_mask_tensors_warped = \ self.warp_ref_image(inv_depths, camera_type, intrinsics_poly_coeffs, intrinsics_principal_point, intrinsics_scale_factors, intrinsics_K, intrinsics_k, intrinsics_p, ref_image, pose, ego_mask_tensors, camera_type, intrinsics_poly_coeffs, intrinsics_principal_point, intrinsics_scale_factors, intrinsics_K, intrinsics_k, intrinsics_p) # Calculate and store image loss photometric_loss = self.calc_photometric_loss(ref_warped, images) for i in range(self.n): photometric_losses[i].append(photometric_loss[i] * ego_mask_tensors[i] * ref_ego_mask_tensors_warped[i]) # If using automask if self.automask_loss: # Calculate and store unwarped image loss ref_images = match_scales(ref_image, inv_depths, self.n) unwarped_image_loss = self.calc_photometric_loss( ref_images, images) for i in range(self.n): photometric_losses[i].append(unwarped_image_loss[i] * ego_mask_tensors[i] * ego_mask_tensors[i]) # geometric context for j, (ref_image, pose) in enumerate( zip(ref_images_geometric_context, poses_geometric_context)): if Cmask[:, j].sum() < B: # Calculate warped images ref_warped, ref_ego_mask_tensors_warped = \ self.warp_ref_image(inv_depths, camera_type, intrinsics_poly_coeffs, intrinsics_principal_point, intrinsics_scale_factors, intrinsics_K, intrinsics_k, intrinsics_p, ref_image, Pose(pose), ref_ego_mask_tensors_geometric_context[j], camera_type_geometric_context[:, j], intrinsics_poly_coeffs_geometric_context[j], intrinsics_principal_point_geometric_context[j], intrinsics_scale_factors_geometric_context[j], intrinsics_K_geometric_context[j], intrinsics_k_geometric_context[j], intrinsics_p_geometric_context[j]) # Calculate and store image loss photometric_loss = self.calc_photometric_loss( ref_warped, images) for i in range(self.n): photometric_loss[i][Cmask[:, j]] = 0.0 photometric_losses[i].append( photometric_loss[i] * ego_mask_tensors[i] * ref_ego_mask_tensors_warped[i]) # If using automask if self.automask_loss: # Calculate and store unwarped image loss ref_images = match_scales(ref_image, inv_depths, self.n) unwarped_image_loss = self.calc_photometric_loss( ref_images, images) for i in range(self.n): unwarped_image_loss[i][Cmask[:, j]] = 0.0 photometric_losses[i].append( unwarped_image_loss[i] * ego_mask_tensors[i] * ref_ego_mask_tensors_geometric_context[j][i]) # geometric-temporal context for j, (ref_image, pose) in enumerate( zip(ref_images_geometric_context_temporal_context, poses_geometric_context_temporal_context)): j_geometric = j // n_temporal_context if Cmask[:, j_geometric].sum() < B: # Calculate warped images ref_warped, ref_ego_mask_tensors_warped = \ self.warp_ref_image(inv_depths, camera_type, intrinsics_poly_coeffs, intrinsics_principal_point, intrinsics_scale_factors, intrinsics_K, intrinsics_k, intrinsics_p, ref_image, Pose(pose.mat @ poses_geometric_context[j_geometric]), # ATTENTION A VERIFIER (changement de repere !) ref_ego_mask_tensors_geometric_context[j_geometric], camera_type_geometric_context[:, j_geometric], intrinsics_poly_coeffs_geometric_context[j_geometric], intrinsics_principal_point_geometric_context[j_geometric], intrinsics_scale_factors_geometric_context[j_geometric], intrinsics_K_geometric_context[j_geometric], intrinsics_k_geometric_context[j_geometric], intrinsics_p_geometric_context[j_geometric]) # Calculate and store image loss photometric_loss = self.calc_photometric_loss( ref_warped, images) for i in range(self.n): photometric_loss[i][Cmask[:, j_geometric]] = 0.0 photometric_losses[i].append( photometric_loss[i] * ego_mask_tensors[i] * ref_ego_mask_tensors_warped[i]) # If using automask if self.automask_loss: # Calculate and store unwarped image loss ref_images = match_scales(ref_image, inv_depths, self.n) unwarped_image_loss = self.calc_photometric_loss( ref_images, images) for i in range(self.n): unwarped_image_loss[i][Cmask[:, j_geometric]] = 0.0 photometric_losses[i].append( unwarped_image_loss[i] * ego_mask_tensors[i] * ref_ego_mask_tensors_geometric_context[j_geometric] [i]) # Calculate reduced photometric loss loss = self.nonzero_reduce_photometric_loss(photometric_losses) # Include smoothness loss if requested if self.smooth_loss_weight > 0.0: loss += self.calc_smoothness_loss( [a * b for a, b in zip(inv_depths, ego_mask_tensors)], [a * b for a, b in zip(images, ego_mask_tensors)]) # Return losses and metrics return { 'loss': loss.unsqueeze(0), 'metrics': self.metrics, }
def reconstruct_distorted(self, depth, mask, frame='w'): """ Reconstructs pixel-wise 3D points from a depth map. Parameters ---------- depth : torch.Tensor [B,1,H,W] Depth map for the camera frame : 'w' Reference frame: 'c' for camera and 'w' for world Returns ------- points : torch.tensor [B,3,H,W] Pixel-wise 3D points """ B, C, H, W = depth[mask].shape assert C == 1 # Create flat index grid grid = image_grid(B, H, W, depth.dtype, depth.device, normalized=False) # [B,3,H,W] flat_grid = grid.view(B, 3, -1) # [B,3,HW] device = depth.get_device() # Estimate the outward undistored rays in the camera frame Xnorm_u = (self.Kinv[mask].bmm(flat_grid)).view(B, 3, H, W) version = 'v1' N = 5 if version == 'v1': x = Xnorm_u[:, 0, :, :].view(B, 1, H, W) y = Xnorm_u[:, 1, :, :].view(B, 1, H, W) x_src = torch.clone(x) y_src = torch.clone(y) for _ in range(N): r2 = x.pow(2) + y.pow(2) r4 = r2.pow(2) r6 = r2 * r4 rad_dist = 1 / (1 + self.k1[mask].view(B,1,1,1) * r2 + self.k2[mask].view(B,1,1,1) * r4 + self.k3[mask].view(B,1,1,1) * r6) tang_dist_x = 2 * self.p1[mask].view(B,1,1,1) * x * y + self.p2[mask].view(B,1,1,1) * (r2 + 2 * x.pow(2)) tang_dist_y = 2 * self.p2[mask].view(B,1,1,1) * x * y + self.p1[mask].view(B,1,1,1) * (r2 + 2 * y.pow(2)) x = (x_src - tang_dist_x) * rad_dist y = (y_src - tang_dist_y) * rad_dist # Distorted rays Xnorm_d = torch.stack([x.squeeze(1), y.squeeze(1), torch.ones_like(x).squeeze(1)], dim=1) elif version == 'v2': x_list = [] y_list = [] x_list.append(Xnorm_u[:, 0, :, :].view(B, 1, H, W)) y_list.append(Xnorm_u[:, 1, :, :].view(B, 1, H, W)) for n in range(N): r2 = x_list[-1].pow(2) + y_list[-1].pow(2) r4 = r2.pow(2) r6 = r2 * r4 rad_dist = 1 / (1 + self.k1[mask].view(B,1,1,1) * r2 + self.k2[mask].view(B,1,1,1) * r4 + self.k3[mask].view(B,1,1,1) * r6) tang_dist_x = 2 * self.p1[mask].view(B,1,1,1) * x_list[-1] * y_list[-1] + self.p2[mask].view(B,1,1,1) * (r2 + 2 * x_list[-1].pow(2)) tang_dist_y = 2 * self.p2[mask].view(B,1,1,1) * x_list[-1] * y_list[-1] + self.p1[mask].view(B,1,1,1) * (r2 + 2 * y_list[-1].pow(2)) x_list.append((x_list[0] - tang_dist_x) * rad_dist) y_list.append((y_list[0] - tang_dist_y) * rad_dist) # Distorted rays Xnorm_d = torch.stack([x_list[-1].squeeze(1), y_list[-1].squeeze(1), torch.ones_like(x_list[-1]).squeeze(1)], dim=1) elif version == 'v3': Xnorm_d = torch.zeros(B, 3, H, W) for b in range(B): Xnorm_d[b] = torch.from_numpy(np.load(self.path_to_theta_lut[b])) Xnorm_d = Xnorm_d.to(device) # ATTENTION RENORMALISER Xnorm_d #Xnorm_d /= norm(Xnorm_d) # Scale rays to metric depth Xc = ((Xnorm_d / torch.sqrt((Xnorm_d[:, 0, :, :].pow(2) + Xnorm_d[:, 1, :, :].pow(2) + Xnorm_d[:, 2, :, :].pow(2)).clamp(min=1e-5)).view(B, 1, H, W)) * depth[mask]).float() # If in camera frame of reference if frame == 'c': return Xc # If in world frame of reference elif frame == 'w': return Pose(self.Twc.mat[mask]) @ Xc # If none of the above else: raise ValueError('Unknown reference frame {}'.format(frame))
def infer_optimal_calib(input_files, model_wrappers, image_shape, half): """ Process a single input file to produce and save visualization Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ N_files = len(input_files[0]) N_cams = 4 # change to half precision for evaluation if requested dtype = torch.float16 if half else None extra_rot_deg = torch.autograd.Variable(torch.zeros(12), requires_grad=True) save_pictures = True n_epochs = 100 learning_rate = 0.05 loss_tab = np.zeros(n_epochs) optimizer = optim.Adam([extra_rot_deg], lr=learning_rate) for epoch in range(n_epochs): extra_rot_deg_grad_list = [] loss_sum = 0 for i_file in range(N_files): cams = [] not_masked = [] camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name( input_files[i_cam][i_file])) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][i_file]) split_type_str = get_split_type(input_files[i_cam][i_file]) seq_name_str = get_sequence_name(input_files[i_cam][i_file]) camera_str = get_camera_name(input_files[i_cam][i_file]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) path_to_theta_lut = get_path_to_theta_lut( input_files[i_cam][0]) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors = get_intrinsics( input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze( 0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) cams.append( CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) ego_mask = np.load(path_to_ego_mask) not_masked.append( torch.from_numpy(ego_mask.astype(float)).cuda().float()) base_0, ext_0 = os.path.splitext( os.path.basename(input_files[0][i_file])) print(base_0) images = [] images_numpy = [] pred_inv_depths = [] pred_depths = [] for i_cam in range(N_cams): images.append( load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) images_numpy.append(images[i_cam][0].cpu().numpy()) #images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]] with torch.no_grad(): pred_inv_depths.append(model_wrappers[0].depth( images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) #mask_colors_blue = np.sum(np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6 # bleu ciel def photo_loss_2imgs(i_cam1, i_cam2, rot_vect_list, save_pictures, rot_str): # Computes the photometric loss between 2 images of adjacent cameras # It reconstructs each image from the adjacent one, using calibration data, # depth prediction model and correction angles alpha, beta, gamma for i, i_cam in enumerate([i_cam1, i_cam2]): base_folder_str = get_base_folder( input_files[i_cam][i_file]) split_type_str = get_split_type(input_files[i_cam][i_file]) seq_name_str = get_sequence_name( input_files[i_cam][i_file]) camera_str = get_camera_name(input_files[i_cam][i_file]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) pose_matrix = get_extrinsics_pose_matrix_extra_rot_torch( input_files[i_cam][i_file], calib_data, rot_vect_list[i]).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format( rank()), dtype=dtype) CameraFisheye.Twc.fget.cache_clear() cams[i_cam].Tcw = pose_tensor world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w') world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w') #depth1_wrt_cam2 = torch.norm(cams[i_cam2].Tcw @ world_points1, dim=1, keepdim=True) #depth2_wrt_cam1 = torch.norm(cams[i_cam1].Tcw @ world_points2, dim=1, keepdim=True) ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w') ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w') reconstructedImg2to1 = funct.grid_sample(images[i_cam2] * not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) reconstructedImg1to2 = funct.grid_sample(images[i_cam1] * not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) #print(reconstructedImg2to1) # z = reconstructedImg2to1.sum() # z.backward() if save_pictures: if epoch == 0: cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam1) + '_' + str(i_file) + '_orig.png', torch.transpose( (images[i_cam1][0, :, :, :] ).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam2) + '_' + str(i_file) + '_orig.png', torch.transpose( (images[i_cam2][0, :, :, :] ).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam1) + '_' + str(i_file) + '_recons_from_' + str(i_cam2) + '_rot' + rot_str + '.png', torch.transpose( ((reconstructedImg2to1 * not_masked[i_cam1] )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam2) + '_' + str(i_file) + '_recons_from_' + str(i_cam1) + '_rot' + rot_str + '.png', torch.transpose( ((reconstructedImg1to2 * not_masked[i_cam2] )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) #reconstructedDepth2to1 = funct.grid_sample(pred_depths[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) #reconstructedDepth1to2 = funct.grid_sample(pred_depths[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) #reconstructedEgo2to1 = funct.grid_sample(not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) #reconstructedEgo1to2 = funct.grid_sample(not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) l1_loss_1 = torch.abs(images[i_cam1] * not_masked[i_cam1] - reconstructedImg2to1 * not_masked[i_cam1]) l1_loss_2 = torch.abs(images[i_cam2] * not_masked[i_cam2] - reconstructedImg1to2 * not_masked[i_cam2]) # SSIM loss ssim_loss_weight = 0.85 ssim_loss_1 = SSIM(images[i_cam1] * not_masked[i_cam1], reconstructedImg2to1 * not_masked[i_cam1], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_2 = SSIM(images[i_cam2] * not_masked[i_cam2], reconstructedImg1to2 * not_masked[i_cam2], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.) ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.) # Weighted Sum: alpha * ssim + (1 - alpha) * l1 photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean( 1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean( 1, True) photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean( 1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean( 1, True) mask1 = photometric_loss_1 != 0 s1 = mask1.sum() loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0 mask2 = photometric_loss_2 != 0 s2 = mask2.sum() loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0 #valid1 = ... #valid2 = ... return loss_1 + loss_2 loss = sum([ photo_loss_2imgs( i, (i + 1) % 4, [ extra_rot_deg[3 * i:3 * (i + 1)], extra_rot_deg[3 * ((i + 1) % 4):3 * (((i + 1) % 4) + 1)] ], save_pictures, '_' + '_'.join([ str(int(100 * extra_rot_deg[i_rot]) / 100) for i_rot in range(12) ])) for i in range(4) ]) #print(loss) optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): #extra_rot_deg_grad_list.append(extra_rot_deg.grad.clone()) #extra_rot_deg.grad.zero_() loss_sum += loss.item() print(loss) with torch.no_grad(): #extra_rot_deg.sub_(sum(extra_rot_deg_grad_list)/N_files*learning_rate) print('End of epoch') print(extra_rot_deg) loss_tab[epoch] = loss_sum / N_files plt.plot(loss_tab) plt.show()
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop): """ Process a single input file to produce and save visualization Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ N_cams = len(input_files) N_files = len(input_files[0]) camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name(input_files[i_cam][0])) cams = [] not_masked = [] cams_x = [] cams_y = [] cams_z = [] # change to half precision for evaluation if requested dtype = torch.float16 if half else None bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5)) # let's assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m'])) cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m'])) cams_z.append(float(calib_data[camera_str]['extrinsics']['pos_y_m'])) path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0]) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors = get_intrinsics( input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) cams.append( CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) ego_mask = np.load(path_to_ego_mask) not_masked.append(ego_mask.astype(bool).reshape(-1)) cams_middle = np.zeros(3) cams_middle[0] = (cams_x[0] + cams_x[1] + cams_x[2] + cams_x[3]) / 4 cams_middle[1] = (cams_y[0] + cams_y[1] + cams_y[2] + cams_y[3]) / 4 cams_middle[2] = (cams_z[0] + cams_z[1] + cams_z[2] + cams_z[3]) / 4 # create output dirs for each cam seq_name = get_sequence_name(input_files[0][0]) # for i_cam in range(N_cams): # os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True) # os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True) # Premiere passe sur toutes les cameras i_file = 0 values = np.arange(1.0, 15.0, 0.5) N_values = values.size nb_points = np.zeros((N_values, 4)) n = 0 for K in values: print(K) base_0, ext_0 = os.path.splitext( os.path.basename(input_files[0][i_file])) print(base_0) images = [] images_numpy = [] pred_inv_depths = [] pred_depths = [] world_points = [] input_depth_files = [] has_gt_depth = [] gt_depth = [] gt_depth_3d = [] pcl_full = [] pcl_only_inliers = [] pcl_only_inliers_left = [] pcl_only_inliers_right = [] pcl_only_outliers = [] ind_clean = [] ind_clean_left = [] ind_clean_right = [] pcl_gt = [] rgb = [] viz_pred_inv_depths = [] for i_cam in range(N_cams): images.append( load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) images_numpy.append(images[i_cam][0].cpu().numpy()) images_numpy[i_cam] = images_numpy[i_cam].reshape( (3, -1)).transpose() images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]] pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) world_points.append(cams[i_cam].reconstruct(K * pred_depths[i_cam], frame='w')) world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape( (3, -1)).transpose() world_points[i_cam] = world_points[i_cam][not_masked[i_cam]] cam_name = camera_names[i_cam] cam_int = cam_name.split('_')[-1] input_depth_files.append(get_depth_file( input_files[i_cam][i_file])) # has_gt_depth.append(os.path.exists(input_depth_files[i_cam])) # if has_gt_depth[i_cam]: # gt_depth.append(np.load(input_depth_files[i_cam])['velodyne_depth'].astype(np.float32)) # gt_depth[i_cam] = torch.from_numpy(gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) # if torch.cuda.is_available(): # gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) # gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w')) # gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy() # gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape((3, -1)).transpose() # #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]] # else: # gt_depth.append(0) # gt_depth_3d.append(0) pcl_full.append(o3d.geometry.PointCloud()) pcl_full[i_cam].points = o3d.utility.Vector3dVector( world_points[i_cam]) pcl_full[i_cam].colors = o3d.utility.Vector3dVector( images_numpy[i_cam]) pcl = pcl_full[i_cam] #.select_by_index(ind) points_tmp = np.asarray(pcl.points) colors_tmp = np.asarray(pcl.colors) # remove points that are above mask_height = points_tmp[:, 2] > 2.5 # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3) mask_colors_blue = np.sum( np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6 # bleu ciel mask_colors_green = np.sum( np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8 mask_colors_green2 = np.sum( np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2 mask = 1 - mask_height * mask_colors_blue mask2 = 1 - mask_height * mask_colors_green mask3 = 1 - mask_height * mask_colors_green2 #maskY = points_tmp[:, 1] > 1 mask = mask * mask2 * mask3 #*maskY cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.4) ind_clean.append( list(set.intersection(set(list(np.where(mask)[0])), set(ind)))) #pcl_only_inliers.append(pcl.select_by_index(ind_clean[i_cam])) # if has_gt_depth[i_cam]: # pcl_gt.append(o3d.geometry.PointCloud()) # pcl_gt[i_cam].points = o3d.utility.Vector3dVector(gt_depth_3d[i_cam]) # gt_inv_depth = 1 / (np.linalg.norm(gt_depth_3d[i_cam], axis=1) + 1e-6) # cm = get_cmap('plasma') # normalizer = .35#np.percentile(gt_inv_depth, 95) # gt_inv_depth /= (normalizer + 1e-6) # #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape) # [:, :3] # # pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3]) # else: # pcl_gt.append(0) #o3d.visualization.draw_geometries(pcl_full + [e for i, e in enumerate(pcl_gt) if e != 0]) color_cam = True if color_cam: for i_cam in range(4): if i_cam == 0: pcl_full[i_cam].paint_uniform_color( [1.0, 0.0, 0.0] ) #.colors = o3d.utility.Vector3dVector(images_numpy[i_cam]) elif i_cam == 1: pcl_full[i_cam].paint_uniform_color([0.0, 1.0, 0.0]) elif i_cam == 2: pcl_full[i_cam].paint_uniform_color([0.0, 0.0, 1.0]) elif i_cam == 3: pcl_full[i_cam].paint_uniform_color([0.3, 0.4, 0.3]) dist_total = np.zeros(4) for i_cam in range(4): points_tmp = np.asarray(pcl_full[i_cam].points) #points_tmp_left = np.asarray(pcl_full[(i_cam-1) % 4].points) #points_tmp_right = np.asarray(pcl_full[(i_cam+1) % 4].points) if i_cam == 0: maskX_left = points_tmp[:, 0] > cams_x[0] maskX_right = points_tmp[:, 0] > cams_x[0] maskY_left = points_tmp[:, 1] > cams_y[3] maskY_right = points_tmp[:, 1] < cams_y[1] elif i_cam == 1: maskX_left = points_tmp[:, 0] > cams_x[0] maskX_right = points_tmp[:, 0] < cams_x[2] maskY_left = points_tmp[:, 1] < cams_y[1] maskY_right = points_tmp[:, 1] < cams_y[1] elif i_cam == 2: maskX_left = points_tmp[:, 0] < cams_x[2] maskX_right = points_tmp[:, 0] < cams_x[2] maskY_left = points_tmp[:, 1] < cams_y[1] maskY_right = points_tmp[:, 1] > cams_y[3] elif i_cam == 3: maskX_left = points_tmp[:, 0] < cams_x[2] maskX_right = points_tmp[:, 0] > cams_x[0] maskY_left = points_tmp[:, 1] > cams_y[3] maskY_right = points_tmp[:, 1] > cams_y[3] far_distance = 5 mask_far = np.sqrt( np.square(points_tmp[:, 0] - cams_middle[0]) + np.square(points_tmp[:, 1] - cams_middle[1])) < far_distance ind_clean_left.append( list( set.intersection( set( list( np.where(mask_far * maskX_left * maskY_left)[0])), set(ind_clean[i_cam])))) ind_clean_right.append( list( set.intersection( set( list( np.where(mask_far * maskX_right * maskY_right)[0])), set(ind_clean[i_cam])))) for i_cam in range(4): pcl1 = pcl_full[i_cam].select_by_index( ind_clean_right[i_cam]).uniform_down_sample(10) pcl2 = pcl_full[(i_cam + 1) % 4].select_by_index( ind_clean_left[(i_cam + 1) % 4]).uniform_down_sample(10) #o3d.visualization.draw_geometries([pcl1, pcl2]) dists = pcl1.compute_point_cloud_distance(pcl2) nb_points[n, i_cam] = np.sum(np.asarray(dists) < 0.5) print(nb_points[n, i_cam]) dists = np.mean(np.asarray(dists)) print(dists) dist_total[i_cam] = dists print(np.mean(dist_total)) # i_cam = 0 # # base_folder_str = get_base_folder(input_files[i_cam][0]) # split_type_str = get_split_type(input_files[i_cam][0]) # seq_name_str = get_sequence_name(input_files[i_cam][0]) # camera_str = get_camera_name(input_files[i_cam][0]) # # calib_data = {} # calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str) # pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix_extra_rot(input_files[i_cam][0], calib_data, 15.0, 0, 0)).unsqueeze(0) # # # # R_cam_0 = cams[0].Tcw.mat.cpu().numpy()[0, :3, :3] # # print(R_cam_0) # # t_cam_0 = cams[0].Tcw.mat.cpu().numpy()[0, :3, 3] # # c = np.cos(0.1) # # s = np.sin(0.1) # # extr_rot = np.array([[c, 0, s], # # [0, 1, 0], # # [-s, 0, c]]) # # R_cam_0 = np.matmul(extr_rot, R_cam_0) # # print(R_cam_0) # # pose_matrix = torch.from_numpy(transform_from_rot_trans(R_cam_0, t_cam_0).astype(np.float32)).unsqueeze(0) # pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()), dtype=dtype) # # CameraFisheye.Twc.fget.cache_clear() # cams[0].Tcw = pose_tensor # # # # world_points[i_cam] = cams[i_cam].reconstruct(pred_depths[i_cam], frame='w') # world_points[i_cam] = world_points[i_cam][0].cpu().numpy() # world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose() # world_points[i_cam] = world_points[i_cam][not_masked[i_cam]] # pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam]) # # for i_cam in range(4): # pcl1 = pcl_full[i_cam].select_by_index(ind_clean_right[i_cam]).uniform_down_sample(10) # pcl2 = pcl_full[(i_cam + 1) % 4].select_by_index(ind_clean_left[(i_cam + 1) % 4]).uniform_down_sample(10) # #o3d.visualization.draw_geometries([pcl1, pcl2]) # dists = pcl1.compute_point_cloud_distance(pcl2) # nb_points[n, i_cam] = np.sum(np.asarray(dists) < 0.5) # print(nb_points[n, i_cam]) # dists = np.mean(np.asarray(dists)) # print(dists) # dist_total[i_cam] = dists # # print(np.mean(dist_total)) # # i_cam1 = 0 # i_cam2 = 1 # pcl1 = pcl_full[i_cam1].select_by_index(ind_clean_right[i_cam1]).uniform_down_sample(10) # pcl2 = pcl_full[i_cam2].select_by_index(ind_clean_left[i_cam2]).uniform_down_sample(10) # dists1 = pcl1.compute_point_cloud_distance(pcl2) # dists2 = pcl2.compute_point_cloud_distance(pcl1) # close_points1 = np.where(np.asarray(dists1) < 0.5)[0] # close_points2 = np.where(np.asarray(dists2) < 0.5)[0] # pcl1 = pcl1.select_by_index(close_points1) # pcl2 = pcl2.select_by_index(close_points2) # # def pcl_distance(extra_rot_deg): # i_cam = 0 # # base_folder_str = get_base_folder(input_files[i_cam][0]) # split_type_str = get_split_type(input_files[i_cam][0]) # seq_name_str = get_sequence_name(input_files[i_cam][0]) # camera_str = get_camera_name(input_files[i_cam][0]) # # calib_data = {} # calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str) # pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix_extra_rot(input_files[i_cam][0], calib_data, extra_rot_deg[0], extra_rot_deg[1], extra_rot_deg[2])).unsqueeze(0) # pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()), dtype=dtype) # CameraFisheye.Twc.fget.cache_clear() # cams[i_cam].Tcw = pose_tensor # # world_points[i_cam] = cams[i_cam].reconstruct(pred_depths[i_cam], frame='w') # # world_points[i_cam] = world_points[i_cam][0].cpu().numpy() # world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose() # world_points[i_cam] = world_points[i_cam][not_masked[i_cam]].astype(np.float64) # pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam]) # # pcl1 = pcl_full[i_cam1].select_by_index(ind_clean_right[i_cam1]).uniform_down_sample(10).select_by_index(close_points1) # pcl2 = pcl_full[i_cam2].select_by_index(ind_clean_left[i_cam2]).uniform_down_sample(10).select_by_index(close_points2) # # dists = pcl1.compute_point_cloud_distance(pcl2) # return np.mean(np.asarray(dists)) # # print(pcl_distance([0.0, 0.0, 0.0])) # print(pcl_distance([15.0, 0.0, 0.0])) # print(pcl_distance([-0.01, 0.0, 0.0])) # print(pcl_distance([0.01, 0.0, 0.0])) # # s = time.time() # result = minimize(pcl_distance, x0=np.array([0.,0.0,0.]), method='L-BFGS-B', options={'disp': True}) # # Method BFGS not working # # Method Nelder-Mead working - time : 12.842947721481323 # # Method Powell maybe working - time : 4.3 # # Method CG not working # # Method L-BFGS-B not working # # # # print("time : " + str(time.time() - s)) # print(result) close_points1 = [] close_points2 = [] for i_cam in range(4): i_cam1 = i_cam i_cam2 = (i_cam + 1) % 4 pcl1 = pcl_full[i_cam1].select_by_index( ind_clean_right[i_cam1]).uniform_down_sample(100) pcl2 = pcl_full[i_cam2].select_by_index( ind_clean_left[i_cam2]).uniform_down_sample(100) dists1 = pcl1.compute_point_cloud_distance(pcl2) dists2 = pcl2.compute_point_cloud_distance(pcl1) close_points1.append(np.where(np.asarray(dists1) < 0.5)[0]) close_points2.append(np.where(np.asarray(dists2) < 0.5)[0]) def pcl_distance(extra_rot_deg): for i_cam in range(4): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix_extra_rot( input_files[i_cam][0], calib_data, extra_rot_deg[3 * i_cam + 0], extra_rot_deg[3 * i_cam + 1], extra_rot_deg[3 * i_cam + 2])).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank()), dtype=dtype) CameraFisheye.Twc.fget.cache_clear() cams[i_cam].Tcw = pose_tensor world_points[i_cam] = cams[i_cam].reconstruct( pred_depths[i_cam], frame='w') world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape( (3, -1)).transpose() world_points[i_cam] = world_points[i_cam][ not_masked[i_cam]].astype(np.float64) pcl_full[i_cam].points = o3d.utility.Vector3dVector( world_points[i_cam]) dist_total = np.zeros(4) for i_cam in range(4): i_cam1 = i_cam i_cam2 = (i_cam + 1) % 4 pcl1 = pcl_full[i_cam1].select_by_index( ind_clean_right[i_cam1]).uniform_down_sample( 100).select_by_index(close_points1[i_cam]) pcl2 = pcl_full[i_cam2].select_by_index( ind_clean_left[i_cam2]).uniform_down_sample( 100).select_by_index(close_points2[i_cam]) dists = pcl1.compute_point_cloud_distance(pcl2) dist_total[i_cam] = np.mean(np.asarray(dists)) return np.mean(dist_total) result = minimize(pcl_distance, x0=np.zeros(12), method='Powell', options={'disp': True}) print(result) o3d.visualization.draw_geometries( [pcl_full[0], pcl_full[1], pcl_full[2], pcl_full[3]]) for i_cam in range(4): pcl1 = pcl_full[i_cam].select_by_index( ind_clean_right[i_cam]).uniform_down_sample(10) pcl2 = pcl_full[(i_cam + 1) % 4].select_by_index( ind_clean_left[(i_cam + 1) % 4]).uniform_down_sample(10) o3d.visualization.draw_geometries([pcl1, pcl2]) # vis_full = o3d.visualization.Visualizer() # vis_full.create_window(visible = True, window_name = 'full'+str(i_file)) # for i_cam in range(N_cams): # vis_full.add_geometry(pcl_full[i_cam]) # for i, e in enumerate(pcl_gt): # if e != 0: # vis_full.add_geometry(e) # ctr = vis_full.get_view_control() # ctr.set_lookat(lookat_vector) # ctr.set_front(front_vector) # ctr.set_up(up_vector) # ctr.set_zoom(zoom_float) # #vis_full.run() # vis_full.destroy_window() i_cam2 = 0 suff = '' vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible=True, window_name='inliers' + str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters( '/home/vbelissen/Downloads/test/cameras_jsons/test' + str(i_cam2 + 1) + suff + '.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if stop: vis_only_inliers.run() #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt n += 1 plt.plot(values, nb_points) plt.show() plt.close() plt.plot(values, np.sum(nb_points, axis=1)) plt.show()
def infer_plot_and_save_3D_pcl(input_file, output_file, model_wrapper, image_shape, half, save): """ Process a single input file to produce and save visualization Parameters ---------- input_file : str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ if not is_image(output_file): # If not an image, assume it's a folder and append the input name os.makedirs(output_file, exist_ok=True) output_file = os.path.join(output_file, os.path.basename(input_file)) # change to half precision for evaluation if requested dtype = torch.float16 if half else None # Load image image = load_image(input_file).convert('RGB') # Resize and to tensor image = resize_image(image, image_shape) image = to_tensor(image).unsqueeze(0) # Send image to GPU if available if torch.cuda.is_available(): image = image.to('cuda:{}'.format(rank()), dtype=dtype) # Depth inference (returns predicted inverse depth) pred_inv_depth = model_wrapper.depth(image) pred_depth = inv2depth(pred_inv_depth) base_folder_str = get_base_folder(input_file) split_type_str = get_split_type(input_file) seq_name_str = get_sequence_name(input_file) camera_str = get_camera_name(input_file) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) path_to_theta_lut = get_path_to_theta_lut(input_file) path_to_ego_mask = get_path_to_ego_mask(input_file) poly_coeffs, principal_point, scale_factors = get_intrinsics( input_file, calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix(input_file, calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) ego_mask = np.load(path_to_ego_mask) not_masked = ego_mask.astype(bool).reshape(-1) cam = CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor) if torch.cuda.is_available(): cam = cam.to('cuda:{}'.format(rank()), dtype=dtype) world_points = cam.reconstruct(pred_depth, frame='w') world_points = world_points[0].cpu().numpy() world_points = world_points.reshape((3, -1)).transpose() gt_depth_file = get_depth_file(input_file) gt_depth = np.load(gt_depth_file)['velodyne_depth'].astype(np.float32) gt_depth = torch.from_numpy(gt_depth).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth = gt_depth.to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d = cam.reconstruct(gt_depth, frame='w') gt_depth_3d = gt_depth_3d[0].cpu().numpy() gt_depth_3d = gt_depth_3d.reshape((3, -1)).transpose() world_points = world_points[not_masked] gt_depth_3d = gt_depth_3d[not_masked] pcl = o3d.geometry.PointCloud() pcl.points = o3d.utility.Vector3dVector(world_points) img_numpy = image[0].cpu().numpy() img_numpy = img_numpy.reshape((3, -1)).transpose() img_numpy = img_numpy[not_masked] pcl.colors = o3d.utility.Vector3dVector(img_numpy) #pcl.paint_uniform_color([1.0, 0.0, 0]) #print("Radius oulier removal") #cl, ind = pcl.remove_radius_outlier(nb_points=10, radius=0.5) #display_inlier_outlier(pcl, ind) remove_outliers = True if remove_outliers: cl, ind = pcl.remove_statistical_outlier(nb_neighbors=10, std_ratio=1.3) #display_inlier_outlier(pcl, ind) inlier_cloud = pcl.select_by_index(ind) #inlier_cloud.paint_uniform_color([0.0, 1.0, 0]) outlier_cloud = pcl.select_by_index(ind, invert=True) outlier_cloud.paint_uniform_color([0.0, 0.0, 1.0]) pcl_gt = o3d.geometry.PointCloud() pcl_gt.points = o3d.utility.Vector3dVector(gt_depth_3d) pcl_gt.paint_uniform_color([1.0, 0.0, 0]) if remove_outliers: o3d.visualization.draw_geometries( [inlier_cloud, pcl_gt, outlier_cloud]) o3d.visualization.draw_geometries([inlier_cloud, pcl_gt]) o3d.visualization.draw_geometries([pcl, pcl_gt]) rgb = image[0].permute(1, 2, 0).detach().cpu().numpy() * 255 # Prepare inverse depth viz_pred_inv_depth = viz_inv_depth(pred_inv_depth[0]) * 255 # Concatenate both vertically image = np.concatenate([rgb, viz_pred_inv_depth], 0) # Save visualization print('Saving {} to {}'.format( pcolor(input_file, 'cyan', attrs=['bold']), pcolor(output_file, 'magenta', attrs=['bold']))) imwrite(output_file, image[:, :, ::-1])
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop): """ Process a single input file to produce and save visualization Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ N_cams = len(input_files) N_files = len(input_files[0]) camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name(input_files[i_cam][0])) cams = [] not_masked = [] cams_x = [] cams_y = [] cams_z = [] alpha_mask = 0.7 # change to half precision for evaluation if requested dtype = torch.float16 if half else None bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5)) # let's assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m'])) cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m'])) cams_z.append(float(calib_data[camera_str]['extrinsics']['pos_z_m'])) path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0]) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors = get_intrinsics( input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) cams.append( CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) ego_mask = np.load(path_to_ego_mask) not_masked.append(ego_mask.astype(bool).reshape(-1)) cams_middle = np.zeros(3) cams_middle[0] = ( cams[0].Twc.mat.cpu().numpy()[0, 0, 3] + cams[1].Twc.mat.cpu().numpy()[0, 0, 3] + cams[2].Twc.mat.cpu().numpy()[0, 0, 3] + cams[3].Twc.mat.cpu().numpy()[0, 0, 3] ) / 4 #(cams_x[0] + cams_x[1] + cams_x[2] + cams_x[3]) / 4 cams_middle[1] = ( cams[0].Twc.mat.cpu().numpy()[0, 1, 3] + cams[1].Twc.mat.cpu().numpy()[0, 1, 3] + cams[2].Twc.mat.cpu().numpy()[0, 1, 3] + cams[3].Twc.mat.cpu().numpy()[0, 1, 3] ) / 4 #(cams_y[0] + cams_y[1] + cams_y[2] + cams_y[3]) / 4 cams_middle[2] = ( cams[0].Twc.mat.cpu().numpy()[0, 2, 3] + cams[1].Twc.mat.cpu().numpy()[0, 2, 3] + cams[2].Twc.mat.cpu().numpy()[0, 2, 3] + cams[3].Twc.mat.cpu().numpy()[0, 2, 3] ) / 4 #(cams_z[0] + cams_z[1] + cams_z[2] + cams_z[3]) / 4 # create output dirs for each cam seq_name = get_sequence_name(input_files[0][0]) for i_cam in range(N_cams): os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True) os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True) first_pic = True for i_file in range(0, N_files, 10): load_pred_masks = False remove_close_points_lidar_semantic = False print_lidar = True base_0, ext_0 = os.path.splitext( os.path.basename(input_files[0][i_file])) print(base_0) images = [] images_numpy = [] predicted_masks = [] pred_inv_depths = [] pred_depths = [] world_points = [] input_depth_files = [] has_gt_depth = [] input_full_masks = [] has_full_mask = [] gt_depth = [] gt_depth_3d = [] pcl_full = [] pcl_only_inliers = [] pcl_only_outliers = [] pcl_gt = [] rgb = [] viz_pred_inv_depths = [] great_lap = [] for i_cam in range(N_cams): images.append( load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) if load_pred_masks: input_pred_mask_file = input_files[i_cam][i_file].replace( 'images_multiview', 'pred_mask') predicted_masks.append( load_image(input_pred_mask_file).convert('RGB')) predicted_masks[i_cam] = resize_image(predicted_masks[i_cam], image_shape) predicted_masks[i_cam] = to_tensor( predicted_masks[i_cam]).unsqueeze(0) if torch.cuda.is_available(): predicted_masks[i_cam] = predicted_masks[i_cam].to( 'cuda:{}'.format(rank()), dtype=dtype) pred_inv_depths.append(model_wrappers[0].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) for i_cam in range(N_cams): print(i_cam) mix_depths = True if mix_depths: depths = (torch.ones(1, 3, 800, 1280) * 500).cuda() depths[0, 1, :, :] = pred_depths[i_cam][0, 0, :, :] # not_masked1s = torch.zeros(3, 800, 1280).to(dtype=bool) # not_masked1 = torch.ones(1, 3, 800, 1280).to(dtype=bool) for relative in [-1, 1]: path_to_ego_mask_relative = get_path_to_ego_mask( input_files[(i_cam + relative) % 4][0]) ego_mask_relative = np.load(path_to_ego_mask_relative) ego_mask_relative = torch.from_numpy( ego_mask_relative.astype(bool)) # reconstructed 3d points from relative depth map relative_points_3d = cams[(i_cam + relative) % 4].reconstruct( pred_depths[(i_cam + relative) % 4], frame='w') # cop of current cam cop = np.zeros((3, 800, 1280)) cop[0, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, 0, 3] cop[1, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, 1, 3] cop[2, :, :] = cams[i_cam].Twc.mat.cpu().numpy()[0, 2, 3] # distances of 3d points to cop of current cam distances_3d = np.linalg.norm( relative_points_3d[0, :, :, :].cpu().numpy() - cop, axis=0) distances_3d = torch.from_numpy(distances_3d).unsqueeze( 0).cuda().float() # projected points on current cam (values should be in (-1,1)), be careful X and Y are switched!!! projected_points_2d = cams[i_cam].project( relative_points_3d, frame='w') projected_points_2d[:, :, :, [0, 1]] = projected_points_2d[:, :, :, [1, 0]] # applying ego mask of relative cam projected_points_2d[:, ~ego_mask_relative, :] = 2 # looking for indices of inbounds pixels x_ok = (projected_points_2d[0, :, :, 0] > -1) * (projected_points_2d[0, :, :, 0] < 1) y_ok = (projected_points_2d[0, :, :, 1] > -1) * (projected_points_2d[0, :, :, 1] < 1) xy_ok = x_ok * y_ok xy_ok_id = xy_ok.nonzero(as_tuple=False) # xy values of these indices (in (-1, 1)) xy_ok_X = xy_ok_id[:, 0] xy_ok_Y = xy_ok_id[:, 1] # xy values in pixels projected_points_2d_ints = (projected_points_2d + 1) / 2 projected_points_2d_ints[0, :, :, 0] = torch.round( projected_points_2d_ints[0, :, :, 0] * 799) projected_points_2d_ints[0, :, :, 1] = torch.round( projected_points_2d_ints[0, :, :, 1] * 1279) projected_points_2d_ints = projected_points_2d_ints.to( dtype=int) # main equation depths[0, 1 + relative, projected_points_2d_ints[0, xy_ok_X, xy_ok_Y, 0], projected_points_2d_ints[0, xy_ok_X, xy_ok_Y, 1]] = distances_3d[0, xy_ok_X, xy_ok_Y] interpolation = False if interpolation: def fillMissingValues( target_for_interp, copy=True, interpolator=scipy.interpolate.LinearNDInterpolator ): import cv2, scipy, numpy as np if copy: target_for_interp = target_for_interp.copy() def getPixelsForInterp(img): """ Calculates a mask of pixels neighboring invalid values - to use for interpolation. """ # mask invalid pixels invalid_mask = np.isnan(img) + (img == 0) kernel = cv2.getStructuringElement( cv2.MORPH_ELLIPSE, (3, 3)) # dilate to mark borders around invalid regions dilated_mask = cv2.dilate( invalid_mask.astype('uint8'), kernel, borderType=cv2.BORDER_CONSTANT, borderValue=int(0)) # pixelwise "and" with valid pixel mask (~invalid_mask) masked_for_interp = dilated_mask * ~invalid_mask return masked_for_interp.astype( 'bool'), invalid_mask # Mask pixels for interpolation mask_for_interp, invalid_mask = getPixelsForInterp( target_for_interp) # Interpolate only holes, only using these pixels points = np.argwhere(mask_for_interp) values = target_for_interp[mask_for_interp] interp = interpolator(points, values) target_for_interp[invalid_mask] = interp( np.argwhere(invalid_mask)) return target_for_interp dd = depths[0, 1 + relative, :, :].cpu().numpy() dd[dd == 500] = np.nan dd = fillMissingValues(dd, copy=True, interpolator=scipy.interpolate. LinearNDInterpolator) dd[np.isnan(dd)] = 500 dd[dd == 0] = 500 depths[0, 1 + relative, :, :] = torch.from_numpy( dd).unsqueeze(0).unsqueeze(0).cuda() depths[depths == 0] = 500 #depths[depths == np.nan] = 500 pred_depths[i_cam] = depths.min(dim=1, keepdim=True)[0] world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam], frame='w')) pred_depth_copy = pred_depths[i_cam].squeeze(0).squeeze( 0).cpu().numpy() pred_depth_copy = np.uint8(pred_depth_copy) lap = np.uint8( np.absolute(cv2.Laplacian(pred_depth_copy, cv2.CV_64F, ksize=3))) great_lap.append(lap < 4) great_lap[i_cam] = great_lap[i_cam].reshape(-1) images_numpy.append(images[i_cam][0].cpu().numpy()) images_numpy[i_cam] = images_numpy[i_cam].reshape( (3, -1)).transpose() images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam] * great_lap[i_cam]] if load_pred_masks: predicted_masks[i_cam] = predicted_masks[i_cam][0].cpu().numpy( ) predicted_masks[i_cam] = predicted_masks[i_cam].reshape( (3, -1)).transpose() predicted_masks[i_cam] = predicted_masks[i_cam][ not_masked[i_cam] * great_lap[i_cam]] for i_cam in range(N_cams): world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape( (3, -1)).transpose() world_points[i_cam] = world_points[i_cam][not_masked[i_cam] * great_lap[i_cam]] cam_name = camera_names[i_cam] cam_int = cam_name.split('_')[-1] input_depth_files.append(get_depth_file( input_files[i_cam][i_file])) has_gt_depth.append(os.path.exists(input_depth_files[i_cam])) if has_gt_depth[i_cam]: gt_depth.append( np.load(input_depth_files[i_cam])['velodyne_depth'].astype( np.float32)) gt_depth[i_cam] = torch.from_numpy( gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format( rank()), dtype=dtype) gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w')) gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy() gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape( (3, -1)).transpose() #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]] else: gt_depth.append(0) gt_depth_3d.append(0) input_full_masks.append( get_full_mask_file(input_files[i_cam][i_file])) has_full_mask.append(os.path.exists(input_full_masks[i_cam])) pcl_full.append(o3d.geometry.PointCloud()) pcl_full[i_cam].points = o3d.utility.Vector3dVector( world_points[i_cam]) pcl_full[i_cam].colors = o3d.utility.Vector3dVector( images_numpy[i_cam]) pcl = pcl_full[i_cam] # .select_by_index(ind) points_tmp = np.asarray(pcl.points) colors_tmp = images_numpy[i_cam] # np.asarray(pcl.colors) # remove points that are above mask_below = points_tmp[:, 2] < -1.0 mask_height = points_tmp[:, 2] > 1.5 # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3) mask_colors_blue = np.sum( np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6 # bleu ciel mask_colors_blue2 = np.sum( np.abs(colors_tmp - np.array([0.8, 1, 1])), axis=1) < 0.6 # bleu ciel mask_colors_green = np.sum( np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8 mask_colors_green2 = np.sum( np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2 mask_below = 1 - mask_below mask = 1 - mask_height * mask_colors_blue mask_bis = 1 - mask_height * mask_colors_blue2 mask2 = 1 - mask_height * mask_colors_green mask3 = 1 - mask_height * mask_colors_green2 mask = mask * mask_bis * mask2 * mask3 * mask_below if load_pred_masks: black_pixels = np.logical_or( np.sum(np.abs(predicted_masks[i_cam] * 255 - np.array([0, 0, 0])), axis=1) < 15, np.sum(np.abs(predicted_masks[i_cam] * 255 - np.array([127, 127, 127])), axis=1) < 20) #background_pixels = np.sum(np.abs(predicted_masks[i_cam]*255 - np.array([127, 127, 127])), axis=1) < 20 ind_black_pixels = np.where(black_pixels)[0] #ind_background_pixels = np.where(background_pixels)[0] color_vector = alpha_mask * predicted_masks[i_cam] + ( 1 - alpha_mask) * images_numpy[i_cam] color_vector[ind_black_pixels] = images_numpy[i_cam][ ind_black_pixels] #color_vector[ind_background_pixels] = images_numpy[i_cam][ind_background_pixels] pcl_full[i_cam].colors = o3d.utility.Vector3dVector( color_vector) # if has_full_mask[i_cam]: # full_mask = np.load(input_full_masks[i_cam]) # mask_colors = label_colors[correspondence[full_mask]].reshape((-1, 3))#.transpose() # mask_colors = mask_colors[not_masked[i_cam]*great_lap[i_cam]] # pcl_full[i_cam].colors = o3d.utility.Vector3dVector(alpha_mask * mask_colors + (1-alpha_mask) * images_numpy[i_cam]) pcl = pcl_full[i_cam] # .select_by_index(ind) pcl = pcl.select_by_index(np.where(mask)[0]) cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.2) pcl = pcl.select_by_index(ind) pcl = pcl.voxel_down_sample(voxel_size=0.02) #if has_full_mask[i_cam]: # pcl.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.2, max_nn=15)) pcl_only_inliers.append( pcl) #pcl_full[i_cam].select_by_index(ind)[mask]) if has_gt_depth[i_cam]: pcl_gt.append(o3d.geometry.PointCloud()) pcl_gt[i_cam].points = o3d.utility.Vector3dVector( gt_depth_3d[i_cam]) gt_inv_depth = 1 / (np.linalg.norm( gt_depth_3d[i_cam] - cams_middle, axis=1) + 1e-6) cm = get_cmap('plasma') normalizer = .35 #np.percentile(gt_inv_depth, 95) gt_inv_depth /= (normalizer + 1e-6) pcl_gt[i_cam].colors = o3d.utility.Vector3dVector( cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3]) else: pcl_gt.append(0) threshold = 0.5 threshold2 = 0.1 if remove_close_points_lidar_semantic: for i_cam in range(4): if has_full_mask[i_cam]: for relative in [-1, 1]: if not has_full_mask[(i_cam + relative) % 4]: dists = pcl_only_inliers[ (i_cam + relative) % 4].compute_point_cloud_distance( pcl_only_inliers[i_cam]) p1 = pcl_only_inliers[ (i_cam + relative) % 4].select_by_index( np.where(np.asarray(dists) > threshold)[0]) p2 = pcl_only_inliers[ (i_cam + relative) % 4].select_by_index( np.where(np.asarray(dists) > threshold)[0], invert=True).uniform_down_sample( 15 ) #.voxel_down_sample(voxel_size=0.5) pcl_only_inliers[(i_cam + relative) % 4] = p1 + p2 if has_gt_depth[i_cam]: if has_full_mask[i_cam]: down = 15 else: down = 30 dists = pcl_only_inliers[ i_cam].compute_point_cloud_distance(pcl_gt[i_cam]) p1 = pcl_only_inliers[i_cam].select_by_index( np.where(np.asarray(dists) > threshold2)[0]) p2 = pcl_only_inliers[i_cam].select_by_index( np.where(np.asarray(dists) > threshold2)[0], invert=True).uniform_down_sample( down) #.voxel_down_sample(voxel_size=0.5) pcl_only_inliers[i_cam] = p1 + p2 if first_pic: for i_cam_n in range(120): vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible=True, window_name='inliers' + str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters( '/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_' + str(i_cam_n) + 'v3.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) opt.point_size = 3.0 #opt.light_on = False #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if stop: vis_only_inliers.run() pcd1 = pcl_only_inliers[0] + pcl_only_inliers[ 1] + pcl_only_inliers[2] + pcl_only_inliers[3] for i_cam3 in range(4): if has_gt_depth[i_cam3]: pcd1 += pcl_gt[i_cam3] #o3d.io.write_point_cloud(os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1) param = vis_only_inliers.get_view_control( ).convert_to_pinhole_camera_parameters() o3d.io.write_pinhole_camera_parameters( '/home/vbelissen/Downloads/test.json', param) image = vis_only_inliers.capture_screen_float_buffer(False) plt.imsave(os.path.join(output_folder, seq_name, 'pcl', base_0, str(i_cam_n) + '.png'), np.asarray(image), dpi=1) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt first_pic = False i_cam2 = 0 #for i_cam2 in range(4): #for suff in ['', 'bis', 'ter']: suff = '' vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible=True, window_name='inliers' + str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) if print_lidar: for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters( '/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_' + str(119) + 'v3.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) opt.point_size = 3.0 #opt.light_on = False #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if stop: vis_only_inliers.run() pcd1 = pcl_only_inliers[0] + pcl_only_inliers[ 1] + pcl_only_inliers[2] + pcl_only_inliers[3] for i_cam3 in range(4): if has_gt_depth[i_cam3]: pcd1 += pcl_gt[i_cam3] if i_cam2 == 0 and suff == '': o3d.io.write_point_cloud( os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1) #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) image = vis_only_inliers.capture_screen_float_buffer(False) plt.imsave(os.path.join( output_folder, seq_name, 'pcl', 'normal', str(i_cam2) + suff, base_0 + '_normal_' + str(i_cam2) + suff + '.png'), np.asarray(image), dpi=1) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt for i_cam in range(N_cams): rgb.append( images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255) viz_pred_inv_depths.append( viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.8) * 255) viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape) == 0] = 0 concat = np.concatenate([rgb[i_cam], viz_pred_inv_depths[i_cam]], 0) # Save visualization output_file1 = os.path.join( output_folder, seq_name, 'depth', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) output_file2 = os.path.join( output_folder, seq_name, 'rgb', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1]) # if has_full_mask[i_cam]: # full_mask = np.load(input_full_masks[i_cam]) # mask_colors = label_colors[correspondence[full_mask]] # imwrite(output_file2, (1-alpha_mask) * rgb[i_cam][:, :, ::-1] + alpha_mask * mask_colors[:, :, ::-1]*255) # else: imwrite(output_file2, rgb[i_cam][:, :, ::-1])
def infer_optimal_calib(input_files, model_wrappers, image_shape): """ Process a list of input files to infer correction in extrinsic calibration. Files should all correspond to the same car. Number of cameras is assumed to be 4 or 5. Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file model_wrappers : nn.Module Model wrappers used for inference image_shape : Image shape Input image shape """ N_files = len(input_files[0]) N_cams = len(input_files) image_area = image_shape[0] * image_shape[1] camera_context_pairs = CAMERA_CONTEXT_PAIRS[N_cams] # Rotation will be optimized if not all cams are frozen optimize_rotation = (args.frozen_cams_rot != [i for i in range(N_cams)]) # Rotation will be optimized if not all cams are frozen optimize_translation = (args.frozen_cams_trans != [i for i in range(N_cams)]) calib_data = {} for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data[camera_str] = read_raw_calib_files_camera_valeo_with_suffix( base_folder_str, split_type_str, seq_name_str, camera_str, args.calibrations_suffix) cams = [] cams_untouched = [] not_masked = [] # Assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors, K, k, p = get_full_intrinsics( input_files[i_cam][0], calib_data) poly_coeffs_untouched = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point_untouched = torch.from_numpy( principal_point).unsqueeze(0) scale_factors_untouched = torch.from_numpy(scale_factors).unsqueeze(0) K_untouched = torch.from_numpy(K).unsqueeze(0) k_untouched = torch.from_numpy(k).unsqueeze(0) p_untouched = torch.from_numpy(p).unsqueeze(0) pose_matrix_untouched = torch.from_numpy( get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor_untouched = Pose(pose_matrix_untouched) camera_type_untouched = get_camera_type(input_files[i_cam][0], calib_data) camera_type_int_untouched = torch.tensor( [get_camera_type_int(camera_type_untouched)]) cams.append( CameraMultifocal(poly_coeffs=poly_coeffs_untouched.float(), principal_point=principal_point_untouched.float(), scale_factors=scale_factors_untouched.float(), K=K_untouched.float(), k1=k_untouched[:, 0].float(), k2=k_untouched[:, 1].float(), k3=k_untouched[:, 2].float(), p1=p_untouched[:, 0].float(), p2=p_untouched[:, 1].float(), camera_type=camera_type_int_untouched, Tcw=pose_tensor_untouched)) cams_untouched.append( CameraMultifocal(poly_coeffs=poly_coeffs_untouched.float(), principal_point=principal_point_untouched.float(), scale_factors=scale_factors_untouched.float(), K=K_untouched.float(), k1=k_untouched[:, 0].float(), k2=k_untouched[:, 1].float(), k3=k_untouched[:, 2].float(), p1=p_untouched[:, 0].float(), p2=p_untouched[:, 1].float(), camera_type=camera_type_int_untouched, Tcw=pose_tensor_untouched)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank())) cams_untouched[i_cam] = cams_untouched[i_cam].to('cuda:{}'.format( rank())) ego_mask = np.load(path_to_ego_mask) not_masked.append( torch.from_numpy(ego_mask.astype(float)).cuda().float()) # Learning variables extra_trans_m = [ torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True) for _ in range(N_cams) ] extra_rot_deg = [ torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True) for _ in range(N_cams) ] # Constraints: translation frozen_cams_trans = args.frozen_cams_trans if frozen_cams_trans is not None: for i_cam in frozen_cams_trans: extra_trans_m[i_cam].requires_grad = False # Constraints: rotation frozen_cams_rot = args.frozen_cams_rot if frozen_cams_rot is not None: for i_cam in frozen_cams_rot: extra_rot_deg[i_cam].requires_grad = False # Parameters from argument parser save_pictures = args.save_pictures n_epochs = args.n_epochs learning_rate = args.lr step_size = args.scheduler_step_size gamma = args.scheduler_gamma # Table of loss loss_tab = np.zeros(n_epochs) # Table of extra rotation values extra_rot_values_tab = np.zeros((N_cams * 3, N_files * n_epochs)) # Table of extra translation values extra_trans_values_tab = np.zeros((N_cams * 3, N_files * n_epochs)) # Optimizer optimizer = optim.Adam(extra_trans_m + extra_rot_deg, lr=learning_rate) # Scheduler scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) # Regularization weights regul_weight_trans = torch.tensor(args.regul_weight_trans).cuda() regul_weight_rot = torch.tensor(args.regul_weight_rot).cuda() regul_weight_overlap = torch.tensor(args.regul_weight_overlap).cuda() # Loop on the number of epochs count = 0 for epoch in range(n_epochs): print('Epoch ' + str(epoch) + '/' + str(n_epochs)) # Initialize loss loss_sum = 0 # Loop on the number of files for i_file in range(N_files): print('') # Filename for camera 0 base_0, ext_0 = os.path.splitext( os.path.basename(input_files[0][i_file])) print(base_0) # Initialize list of tensors: images, predicted inverse depths and predicted depths images, pred_inv_depths, pred_depths = [], [], [] input_depth_files, has_gt_depth, gt_depth, gt_inv_depth = [], [], [], [] nb_gt_depths = 0 # Reset camera poses Twc CameraMultifocal.Twc.fget.cache_clear() # Loop on cams and predict depth for i_cam in range(N_cams): images.append( load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank())) with torch.no_grad(): pred_inv_depths.append(model_wrappers[i_cam].depth( images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) if args.use_lidar: input_depth_files.append( get_depth_file(input_files[i_cam][i_file], args.depth_suffix)) has_gt_depth.append( os.path.exists(input_depth_files[i_cam])) if has_gt_depth[i_cam]: nb_gt_depths += 1 gt_depth.append( np.load(input_depth_files[i_cam]) ['velodyne_depth'].astype(np.float32)) gt_depth[i_cam] = torch.from_numpy( gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) gt_inv_depth.append(depth2inv(gt_depth[i_cam])) if torch.cuda.is_available(): gt_depth[i_cam] = gt_depth[i_cam].to( 'cuda:{}'.format(rank())) gt_inv_depth[i_cam] = gt_inv_depth[i_cam].to( 'cuda:{}'.format(rank())) else: gt_depth.append(0) gt_inv_depth.append(0) # Apply correction on cams pose_matrix = get_extrinsics_pose_matrix_extra_trans_rot_torch( input_files[i_cam][i_file], calib_data, extra_trans_m[i_cam], extra_rot_deg[i_cam]).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank())) cams[i_cam].Tcw = pose_tensor # Define a loss function between 2 images def photo_loss_2imgs(i_cam1, i_cam2, save_pictures): # Computes the photometric loss between 2 images of adjacent cameras # It reconstructs each image from the adjacent one, applying correction in rotation and translation # Reconstruct 3D points for each cam world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w') world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w') # Get coordinates of projected points on other cam ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w') ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w') # Reconstruct each image from the adjacent camera reconstructedImg2to1 = funct.grid_sample(images[i_cam2] * not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) reconstructedImg1to2 = funct.grid_sample(images[i_cam1] * not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) # Save pictures if requested if save_pictures: # Save original files if first epoch if epoch == 0: cv2.imwrite( args.save_folder + '/cam_' + str(i_cam1) + '_file_' + str(i_file) + '_orig.png', (images[i_cam1][0].permute(1, 2, 0) )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255) cv2.imwrite( args.save_folder + '/cam_' + str(i_cam2) + '_file_' + str(i_file) + '_orig.png', (images[i_cam2][0].permute(1, 2, 0) )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255) # Save reconstructed images cv2.imwrite( args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam1) + '_recons_from_' + str(i_cam2) + '.png', ((reconstructedImg2to1 * not_masked[i_cam1])[0].permute(1, 2, 0) )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255) cv2.imwrite( args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam2) + '_recons_from_' + str(i_cam1) + '.png', ((reconstructedImg1to2 * not_masked[i_cam2])[0].permute(1, 2, 0) )[:, :, [2, 1, 0]].detach().cpu().numpy() * 255) # L1 loss l1_loss_1 = torch.abs(images[i_cam1] * not_masked[i_cam1] - reconstructedImg2to1 * not_masked[i_cam1]) l1_loss_2 = torch.abs(images[i_cam2] * not_masked[i_cam2] - reconstructedImg1to2 * not_masked[i_cam2]) # SSIM loss ssim_loss_weight = 0.85 ssim_loss_1 = SSIM(images[i_cam1] * not_masked[i_cam1], reconstructedImg2to1 * not_masked[i_cam1], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_2 = SSIM(images[i_cam2] * not_masked[i_cam2], reconstructedImg1to2 * not_masked[i_cam2], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.) ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.) # Photometric loss: alpha * ssim + (1 - alpha) * l1 photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean( 1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean( 1, True) photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean( 1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean( 1, True) # Compute the number of valid pixels mask1 = (reconstructedImg2to1 * not_masked[i_cam1]).sum( axis=1, keepdim=True) != 0 s1 = mask1.sum().float() mask2 = (reconstructedImg1to2 * not_masked[i_cam2]).sum( axis=1, keepdim=True) != 0 s2 = mask2.sum().float() # Compute the photometric losses weighed by the number of valid pixels loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0 loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0 # The final loss can be regularized to encourage a similar overlap between images if s1 > 0 and s2 > 0: return loss_1 + loss_2 + regul_weight_overlap * image_area * ( 1 / s1 + 1 / s2) else: return 0. def lidar_loss(i_cam1, save_pictures): if args.use_lidar and has_gt_depth[i_cam1]: mask_zeros_lidar = ( gt_depth[i_cam1][0, 0, :, :] == 0).detach() # Ground truth sparse depth maps were generated using the untouched camera extrinsics world_points_gt_oldCalib = cams_untouched[ i_cam1].reconstruct(gt_depth[i_cam1], frame='w') world_points_gt_oldCalib[0, 0, mask_zeros_lidar] = 0. # Get coordinates of projected points on new cam ref_coords = cams[i_cam1].project(world_points_gt_oldCalib, frame='w') ref_coords[0, mask_zeros_lidar, :] = 0. # Reconstruct projected lidar from the new camera reprojected_gt_inv_depth = funct.grid_sample( gt_inv_depth[i_cam1], ref_coords, mode='nearest', padding_mode='zeros', align_corners=True) reprojected_gt_inv_depth[0, 0, mask_zeros_lidar] = 0. mask_reprojected = (reprojected_gt_inv_depth > 0.).detach() if save_pictures: mask_reprojected_numpy = mask_reprojected[ 0, 0, :, :].cpu().numpy() u = np.where(mask_reprojected_numpy)[0] v = np.where(mask_reprojected_numpy)[1] n_lidar = u.size reprojected_gt_depth_numpy = inv2depth( reprojected_gt_inv_depth)[ 0, 0, :, :].detach().cpu().numpy() im = (images[i_cam1][0].permute( 1, 2, 0))[:, :, [2, 1, 0]].detach().cpu().numpy() * 255 dmax = 100. for i_l in range(n_lidar): d = reprojected_gt_depth_numpy[u[i_l], v[i_l]] s = int((8 / d)) + 1 im[u[i_l] - s:u[i_l] + s, v[i_l] - s:v[i_l] + s, 0] = np.clip( np.power(d / dmax, .7) * 255, 10, 245) im[u[i_l] - s:u[i_l] + s, v[i_l] - s:v[i_l] + s, 1] = np.clip( np.power((dmax - d) / dmax, 4.0) * 255, 10, 245) im[u[i_l] - s:u[i_l] + s, v[i_l] - s:v[i_l] + s, 2] = np.clip( np.power(np.abs(2 * (d - .5 * dmax) / dmax), 3.0) * 255, 10, 245) cv2.imwrite( args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam1) + '_lidar.png', im) if mask_reprojected.sum() > 0: return l1_lidar_loss( pred_inv_depths[i_cam1] * not_masked[i_cam1], reprojected_gt_inv_depth * not_masked[i_cam1]) else: return 0. else: return 0. if nb_gt_depths > 0: final_lidar_weight = (N_cams / nb_gt_depths) * args.lidar_weight else: final_lidar_weight = 0. # The final loss consists of summing the photometric loss of all pairs of adjacent cameras # and is regularized to prevent weights from exploding photo_loss = 1.0 * sum([ photo_loss_2imgs(p[0], p[1], save_pictures) for p in camera_context_pairs ]) regul_rot_loss = regul_weight_rot * sum( [(extra_rot_deg[i]**2).sum() for i in range(N_cams)]) regul_trans_loss = regul_weight_trans * sum( [(extra_trans_m[i]**2).sum() for i in range(N_cams)]) lidar_gt_loss = final_lidar_weight * sum( [lidar_loss(i, save_pictures) for i in range(N_cams)]) loss = photo_loss + regul_rot_loss + regul_trans_loss + lidar_gt_loss with torch.no_grad(): extra_rot_deg_before = [] extra_trans_m_before = [] for i_cam in range(N_cams): extra_rot_deg_before.append(extra_rot_deg[i_cam].clone()) extra_trans_m_before.append(extra_trans_m[i_cam].clone()) # Optimization steps optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): extra_rot_deg_after = [] extra_trans_m_after = [] for i_cam in range(N_cams): extra_rot_deg_after.append(extra_rot_deg[i_cam].clone()) extra_trans_m_after.append(extra_trans_m[i_cam].clone()) rot_change_file = 0. trans_change_file = 0. for i_cam in range(N_cams): rot_change_file += torch.abs( extra_rot_deg_after[i_cam] - extra_rot_deg_before[i_cam]).mean().item() trans_change_file += torch.abs( extra_trans_m_after[i_cam] - extra_trans_m_before[i_cam]).mean().item() rot_change_file /= N_cams trans_change_file /= N_cams print('Average rotation change (deg.): ' + "{:.4f}".format(rot_change_file)) print('Average translation change (m.): ' + "{:.4f}".format(trans_change_file)) # Save correction values and print loss with torch.no_grad(): loss_sum += loss.item() for i_cam in range(N_cams): for j in range(3): if optimize_rotation: extra_rot_values_tab[ 3 * i_cam + j, count] = extra_rot_deg[i_cam][j].item() if optimize_translation: extra_trans_values_tab[ 3 * i_cam + j, count] = extra_trans_m[i_cam][j].item() print('Loss: ' + "{:.3f}".format(loss.item()) \ + ' (photometric: ' + "{:.3f}".format(photo_loss.item()) \ + ', rotation reg.: ' + "{:.4f}".format(regul_rot_loss.item()) \ + ', translation reg.: ' + "{:.4f}".format(regul_trans_loss.item()) + ', lidar: ' + "{:.3f}".format(lidar_gt_loss) +')') if nb_gt_depths > 0: print('Number of ground truth lidar maps: ' + str(nb_gt_depths)) count += 1 # Update scheduler print('Epoch:', epoch, 'LR:', scheduler.get_lr()) scheduler.step() with torch.no_grad(): print('End of epoch') if optimize_translation: print('New translation correction values: ') print(extra_trans_m) if optimize_rotation: print('New rotation correction values: ') print(extra_rot_deg) print('Average rotation change in epoch:') loss_tab[epoch] = loss_sum / N_files # Plot/save loss if requested plt.figure() plt.plot(loss_tab) if args.show_plots: plt.show() if args.save_plots: plt.savefig( os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_loss.png')) # Plot/save correction values if requested if optimize_rotation: plt.figure() for j in range(N_cams * 3): plt.plot(extra_rot_values_tab[j]) if args.show_plots: plt.show() if args.save_plots: plt.savefig( os.path.join( args.save_folder, get_sequence_name(input_files[0][0]) + '_extra_rot.png')) if optimize_translation: plt.figure() for j in range(N_cams * 3): plt.plot(extra_trans_values_tab[j]) if args.show_plots: plt.show() if args.save_plots: plt.savefig( os.path.join( args.save_folder, get_sequence_name(input_files[0][0]) + '_extra_trans.png')) # Save correction values table if requested if args.save_rot_tab: np.save( os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_rot_tab.npy'), extra_rot_values_tab)
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop): """ Process a single input file to produce and save visualization Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ N_cams = len(input_files) N_files = len(input_files[0]) camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name(input_files[i_cam][0])) cams = [] not_masked = [] cams_x = [] cams_y = [] # change to half precision for evaluation if requested dtype = torch.float16 if half else None bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5)) # let's assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str) cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m'])) cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m'])) path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0]) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors = get_intrinsics(input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) cams.append(CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) ego_mask = np.load(path_to_ego_mask) not_masked.append(ego_mask.astype(bool).reshape(-1)) # create output dirs for each cam seq_name = get_sequence_name(input_files[0][0]) for i_cam in range(N_cams): os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True) os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True) i_file=0 values = np.arange(1.0,15.0,0.5) N_values = values.size nb_points = np.zeros((N_values, 4)) n = 0 for K in values: print(K) base_0, ext_0 = os.path.splitext(os.path.basename(input_files[0][i_file])) print(base_0) images = [] images_numpy = [] pred_inv_depths = [] pred_depths = [] world_points = [] input_depth_files = [] has_gt_depth = [] gt_depth = [] gt_depth_3d = [] pcl_full = [] pcl_only_inliers = [] pcl_only_outliers = [] pcl_gt = [] rgb = [] viz_pred_inv_depths = [] for i_cam in range(N_cams): images.append(load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) images_numpy.append(images[i_cam][0].cpu().numpy()) images_numpy[i_cam] = images_numpy[i_cam].reshape((3, -1)).transpose() images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]] pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) world_points.append(cams[i_cam].reconstruct(K*pred_depths[i_cam], frame='w')) world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose() world_points[i_cam] = world_points[i_cam][not_masked[i_cam]] cam_name = camera_names[i_cam] cam_int = cam_name.split('_')[-1] input_depth_files.append(get_depth_file(input_files[i_cam][i_file])) has_gt_depth.append(os.path.exists(input_depth_files[i_cam])) if has_gt_depth[i_cam]: gt_depth.append(np.load(input_depth_files[i_cam])['velodyne_depth'].astype(np.float32)) gt_depth[i_cam] = torch.from_numpy(gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w')) gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy() gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape((3, -1)).transpose() #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]] else: gt_depth.append(0) gt_depth_3d.append(0) pcl_full.append(o3d.geometry.PointCloud()) pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam]) pcl_full[i_cam].colors = o3d.utility.Vector3dVector(images_numpy[i_cam]) pcl = pcl_full[i_cam]#.select_by_index(ind) points_tmp = np.asarray(pcl.points) colors_tmp = np.asarray(pcl.colors) # remove points that are above mask_height = points_tmp[:, 2] > 2.5# * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3) mask_colors_blue = np.sum(np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6 # bleu ciel mask_colors_green = np.sum(np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8 mask_colors_green2 = np.sum(np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2 mask = 1-mask_height*mask_colors_blue mask2 = 1-mask_height*mask_colors_green mask3 = 1- mask_height*mask_colors_green2 #maskY = points_tmp[:, 1] > 1 mask = mask*mask2*mask3#*maskY pcl = pcl.select_by_index(np.where(mask)[0]) cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.4) pcl = pcl.select_by_index(ind) pcl_only_inliers.append(pcl)#pcl_full[i_cam].select_by_index(ind)[mask]) #pcl_only_outliers.append(pcl_full[i_cam].select_by_index(ind, invert=True)) #pcl_only_outliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0]) if has_gt_depth[i_cam]: pcl_gt.append(o3d.geometry.PointCloud()) pcl_gt[i_cam].points = o3d.utility.Vector3dVector(gt_depth_3d[i_cam]) gt_inv_depth = 1 / (np.linalg.norm(gt_depth_3d[i_cam], axis=1) + 1e-6) cm = get_cmap('plasma') normalizer = .35#np.percentile(gt_inv_depth, 95) gt_inv_depth /= (normalizer + 1e-6) #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape) # [:, :3] pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3]) else: pcl_gt.append(0) #o3d.visualization.draw_geometries(pcl_full + [e for i, e in enumerate(pcl_gt) if e != 0]) color_cam = True if color_cam: for i_cam in range(4): if i_cam == 0: pcl_only_inliers[i_cam].paint_uniform_color([1.0, 0.0, 0.0])#.colors = o3d.utility.Vector3dVector(images_numpy[i_cam]) elif i_cam == 1: pcl_only_inliers[i_cam].paint_uniform_color([0.0, 1.0, 0.0]) elif i_cam == 2: pcl_only_inliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0]) elif i_cam == 3: pcl_only_inliers[i_cam].paint_uniform_color([0.3, 0.4, 0.3]) dist_total = np.zeros(4) for i_cam in range(4): pcl1 = pcl_only_inliers[i_cam % 4].uniform_down_sample(10) pcl2 = pcl_only_inliers[(i_cam+1) % 4].uniform_down_sample(10) points_tmp1 = np.asarray(pcl1.points) points_tmp2 = np.asarray(pcl2.points) if i_cam == 3 or i_cam == 0: # comparaison entre 3 et 0 ou entre 0 et 1 maskX1 = points_tmp1[:, 0] > cams_x[0] maskX2 = points_tmp2[:, 0] > cams_x[0] if i_cam == 1 or i_cam == 2: # comparaison entre 3 et 0 ou entre 0 et 1 maskX1 = points_tmp1[:, 0] < cams_x[2] maskX2 = points_tmp2[:, 0] < cams_x[2] if i_cam == 2 or i_cam == 3: # comparaison entre 3 et 0 ou entre 0 et 1 maskY1 = points_tmp1[:, 1] > cams_y[3] maskY2 = points_tmp2[:, 1] > cams_y[3] if i_cam == 0 or i_cam == 1: # comparaison entre 3 et 0 ou entre 0 et 1 maskY1 = points_tmp1[:, 1] < cams_y[1] maskY2 = points_tmp2[:, 1] < cams_y[1] mask_far1 = np.sqrt(np.square(points_tmp1[:, 0] - (cams_x[0] + cams_x[2])/2) + np.square(points_tmp1[:, 1] - (cams_y[1] + cams_y[3])/2)) < 5 mask_far2 = np.sqrt(np.square(points_tmp2[:, 0] - (cams_x[0] + cams_x[2]) / 2) + np.square(points_tmp2[:, 1] - (cams_y[1] + cams_y[3]) / 2)) < 5 pcl1 = pcl1.select_by_index(np.where(maskX1 * maskY1 * mask_far1)[0]) pcl2 = pcl2.select_by_index(np.where(maskX2 * maskY2 * mask_far2)[0]) o3d.visualization.draw_geometries([pcl1, pcl2]) dists = pcl1.compute_point_cloud_distance(pcl2) nb_points[n, i_cam] = np.sum(np.asarray(dists)<0.5) print(nb_points[n, i_cam]) dists = np.mean(np.asarray(dists)) print(dists) dist_total[i_cam] = dists print(np.mean(dist_total)) # vis_full = o3d.visualization.Visualizer() # vis_full.create_window(visible = True, window_name = 'full'+str(i_file)) # for i_cam in range(N_cams): # vis_full.add_geometry(pcl_full[i_cam]) # for i, e in enumerate(pcl_gt): # if e != 0: # vis_full.add_geometry(e) # ctr = vis_full.get_view_control() # ctr.set_lookat(lookat_vector) # ctr.set_front(front_vector) # ctr.set_up(up_vector) # ctr.set_zoom(zoom_float) # #vis_full.run() # vis_full.destroy_window() i_cam2=0 suff='' vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible = True, window_name = 'inliers'+str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters('/home/vbelissen/Downloads/test/cameras_jsons/test'+str(i_cam2+1)+suff+'.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if stop: vis_only_inliers.run() #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt n += 1 plt.plot(values, nb_points) plt.show() plt.close() plt.plot(values, np.sum(nb_points,axis=1)) plt.show()
def photo_loss_2imgs(i_cam1, i_cam2, extra_trans_list, extra_rot_list, save_pictures): # Computes the photometric loss between 2 images of adjacent cameras # It reconstructs each image from the adjacent one, applying correction in rotation and translation # Apply correction on two cams for i, i_cam in enumerate([i_cam1, i_cam2]): pose_matrix = get_extrinsics_pose_matrix_extra_trans_rot_torch(input_files[i_cam][i_file], calib_data, extra_trans_list[i], extra_rot_list[i]).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank())) CameraMultifocal.Twc.fget.cache_clear() cams[i_cam].Tcw = pose_tensor # Reconstruct 3D points for each cam world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w') world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w') # Get coordinates of projected points on other cam ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w') ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w') # Reconstruct each image from the adjacent camera reconstructedImg2to1 = funct.grid_sample(images[i_cam2]*not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) reconstructedImg1to2 = funct.grid_sample(images[i_cam1]*not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) # Save pictures if requested if save_pictures: # Save original files if first epoch if epoch == 0: cv2.imwrite(args.save_folder + '/cam_' + str(i_cam1) + '_file_' + str(i_file) + '_orig.png', (images[i_cam1][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) cv2.imwrite(args.save_folder + '/cam_' + str(i_cam2) + '_file_' + str(i_file) + '_orig.png', (images[i_cam2][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) # Save reconstructed images cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam1) + '_recons_from_' + str(i_cam2) + '.png', ((reconstructedImg2to1*not_masked[i_cam1])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam2) + '_recons_from_' + str(i_cam1) + '.png', ((reconstructedImg1to2*not_masked[i_cam2])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) # L1 loss l1_loss_1 = torch.abs(images[i_cam1]*not_masked[i_cam1] - reconstructedImg2to1*not_masked[i_cam1]) l1_loss_2 = torch.abs(images[i_cam2]*not_masked[i_cam2] - reconstructedImg1to2*not_masked[i_cam2]) # SSIM loss ssim_loss_weight = 0.85 ssim_loss_1 = SSIM(images[i_cam1]*not_masked[i_cam1], reconstructedImg2to1*not_masked[i_cam1], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_2 = SSIM(images[i_cam2]*not_masked[i_cam2], reconstructedImg1to2*not_masked[i_cam2], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.) ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.) # Photometric loss: alpha * ssim + (1 - alpha) * l1 photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(1, True) photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(1, True) # Compute the number of valid pixels mask1 = (reconstructedImg2to1 * not_masked[i_cam1]).sum(axis=1, keepdim=True) != 0 s1 = mask1.sum().float() mask2 = (reconstructedImg1to2 * not_masked[i_cam2]).sum(axis=1, keepdim=True) != 0 s2 = mask2.sum().float() # Compute the photometric losses weighed by the number of valid pixels loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0 loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0 # The final loss can be regularized to encourage a similar overlap between images if s1 > 0 and s2 > 0: return loss_1 + loss_2 + regul_weight_overlap * image_area * (1/s1 + 1/s2) else: return 0.
def infer_optimal_calib(input_files, model_wrappers, image_shape): """ Process a list of input files to infer correction in extrinsic calibration. Files should all correspond to the same car. Number of cameras is assumed to be 4. Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file model_wrappers : nn.Module Model wrappers used for inference image_shape : Image shape Input image shape """ N_files = len(input_files[0]) N_cams = len(input_files) image_area = image_shape[0] * image_shape[1] camera_context_pairs = CAMERA_CONTEXT_PAIRS[N_cams] calib_data = {} for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str) cams = [] not_masked = [] # Assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors, K, k, p = get_full_intrinsics(input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) K = torch.from_numpy(K).unsqueeze(0) k = torch.from_numpy(k).unsqueeze(0) p = torch.from_numpy(p).unsqueeze(0) pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) camera_type = get_camera_type(input_files[i_cam][0], calib_data) camera_type_int = torch.tensor([get_camera_type_int(camera_type)]) cams.append(CameraMultifocal(poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), K=K.float(), k1=k[:, 0].float(), k2=k[:, 1].float(), k3=k[:, 2].float(), p1=p[:, 0].float(), p2=p[:, 1].float(), camera_type=camera_type_int, Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank())) ego_mask = np.load(path_to_ego_mask) not_masked.append(torch.from_numpy(ego_mask.astype(float)).cuda().float()) # Learning variables extra_trans_m = [torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True) for _ in range(N_cams)] extra_rot_deg = [torch.autograd.Variable(torch.zeros(3).cuda(), requires_grad=True) for _ in range(N_cams)] # Constraints: translation frozen_cams_trans = args.frozen_cams_trans if frozen_cams_trans is not None: for i_cam in frozen_cams_trans: extra_trans_m[i_cam].requires_grad = False # Constraints: rotation frozen_cams_rot = args.frozen_cams_rot if frozen_cams_rot is not None: for i_cam in frozen_cams_rot: extra_rot_deg[i_cam].requires_grad = False # Parameters from argument parser save_pictures = args.save_pictures n_epochs = args.n_epochs learning_rate = args.lr step_size = args.scheduler_step_size gamma = args.scheduler_gamma # Table of loss loss_tab = np.zeros(n_epochs) # Table of extra rotation values extra_rot_values_tab = np.zeros((N_cams * 3, N_files * n_epochs)) # Optimizer optimizer = optim.Adam(extra_trans_m + extra_rot_deg, lr=learning_rate) # Scheduler scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) # Regularization weights regul_weight_trans = torch.tensor(args.regul_weight_trans).cuda() regul_weight_rot = torch.tensor(args.regul_weight_rot).cuda() regul_weight_overlap = torch.tensor(args.regul_weight_overlap).cuda() # Loop on the number of epochs count = 0 for epoch in range(n_epochs): print('Epoch ' + str(epoch) + '/' + str(n_epochs)) # Initialize loss loss_sum = 0 # Loop on the number of files for i_file in range(N_files): # Filename for camera 0 base_0, ext_0 = os.path.splitext(os.path.basename(input_files[0][i_file])) print(base_0) # Initialize list of tensors: images, predicted inverse depths and predicted depths images = [] pred_inv_depths = [] pred_depths = [] # Loop on cams and predict depth for i_cam in range(N_cams): images.append(load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank())) with torch.no_grad(): pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) # Define a loss function between 2 images def photo_loss_2imgs(i_cam1, i_cam2, extra_trans_list, extra_rot_list, save_pictures): # Computes the photometric loss between 2 images of adjacent cameras # It reconstructs each image from the adjacent one, applying correction in rotation and translation # Apply correction on two cams for i, i_cam in enumerate([i_cam1, i_cam2]): pose_matrix = get_extrinsics_pose_matrix_extra_trans_rot_torch(input_files[i_cam][i_file], calib_data, extra_trans_list[i], extra_rot_list[i]).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format(rank())) CameraMultifocal.Twc.fget.cache_clear() cams[i_cam].Tcw = pose_tensor # Reconstruct 3D points for each cam world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w') world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w') # Get coordinates of projected points on other cam ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w') ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w') # Reconstruct each image from the adjacent camera reconstructedImg2to1 = funct.grid_sample(images[i_cam2]*not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) reconstructedImg1to2 = funct.grid_sample(images[i_cam1]*not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) # Save pictures if requested if save_pictures: # Save original files if first epoch if epoch == 0: cv2.imwrite(args.save_folder + '/cam_' + str(i_cam1) + '_file_' + str(i_file) + '_orig.png', (images[i_cam1][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) cv2.imwrite(args.save_folder + '/cam_' + str(i_cam2) + '_file_' + str(i_file) + '_orig.png', (images[i_cam2][0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) # Save reconstructed images cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam1) + '_recons_from_' + str(i_cam2) + '.png', ((reconstructedImg2to1*not_masked[i_cam1])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) cv2.imwrite(args.save_folder + '/epoch_' + str(epoch) + '_file_' + str(i_file) + '_cam_' + str(i_cam2) + '_recons_from_' + str(i_cam1) + '.png', ((reconstructedImg1to2*not_masked[i_cam2])[0].permute(1, 2, 0))[:,:,[2,1,0]].detach().cpu().numpy() * 255) # L1 loss l1_loss_1 = torch.abs(images[i_cam1]*not_masked[i_cam1] - reconstructedImg2to1*not_masked[i_cam1]) l1_loss_2 = torch.abs(images[i_cam2]*not_masked[i_cam2] - reconstructedImg1to2*not_masked[i_cam2]) # SSIM loss ssim_loss_weight = 0.85 ssim_loss_1 = SSIM(images[i_cam1]*not_masked[i_cam1], reconstructedImg2to1*not_masked[i_cam1], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_2 = SSIM(images[i_cam2]*not_masked[i_cam2], reconstructedImg1to2*not_masked[i_cam2], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.) ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.) # Photometric loss: alpha * ssim + (1 - alpha) * l1 photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean(1, True) photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean(1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean(1, True) # Compute the number of valid pixels mask1 = (reconstructedImg2to1 * not_masked[i_cam1]).sum(axis=1, keepdim=True) != 0 s1 = mask1.sum().float() mask2 = (reconstructedImg1to2 * not_masked[i_cam2]).sum(axis=1, keepdim=True) != 0 s2 = mask2.sum().float() # Compute the photometric losses weighed by the number of valid pixels loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0 loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0 # The final loss can be regularized to encourage a similar overlap between images if s1 > 0 and s2 > 0: return loss_1 + loss_2 + regul_weight_overlap * image_area * (1/s1 + 1/s2) else: return 0. # The final loss consists of summing the photometric loss of all pairs of adjacent cameras # and is regularized to prevent weights from exploding loss = sum([photo_loss_2imgs(i, (i + 1) % 4, [extra_trans_m[i], extra_trans_m[(i + 1) % 4]], [extra_rot_deg[i], extra_rot_deg[(i + 1) % 4]], save_pictures) for i in range(4)]) \ + regul_weight_rot * sum([(extra_rot_deg[i] ** 2).sum() for i in range(4)]) \ + regul_weight_trans * sum([(extra_trans_m[i] ** 2).sum() for i in range(4)]) # Optimization steps optimizer.zero_grad() loss.backward() optimizer.step() # Save correction values and print loss with torch.no_grad(): loss_sum += loss.item() for i_cam in range(N_cams): for j in range(3): extra_rot_values_tab[3*i_cam + j, count] = extra_rot_deg[i_cam][j].item() print('Loss:') print(loss) count += 1 # Update scheduler print('Epoch:', epoch, 'LR:', scheduler.get_lr()) scheduler.step() with torch.no_grad(): print('End of epoch') print(extra_trans_m) print(extra_rot_deg) loss_tab[epoch] = loss_sum/N_files # Plot/save loss if requested plt.figure() plt.plot(loss_tab) if args.show_plots: plt.show() if args.save_plots: plt.savefig(os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_loss.png')) # Plot/save correction values if requested plt.figure() for j in range(N_cams * 3): plt.plot(extra_rot_values_tab[j]) if args.show_plots: plt.show() if args.save_plots: plt.savefig(os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_extra_rot.png')) # Save correction values table if requested if args.save_rot_tab: np.save(os.path.join(args.save_folder, get_sequence_name(input_files[0][0]) + '_rot_tab.npy'), extra_rot_values_tab)
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop): """ Process a single input file to produce and save visualization Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ N_cams = len(input_files) N_files = len(input_files[0]) camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name(input_files[i_cam][0])) cams = [] not_masked = [] # change to half precision for evaluation if requested dtype = torch.float16 if half else None bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5)) # let's assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0]) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors = get_intrinsics( input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy( get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) cams.append( CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) ego_mask = np.load(path_to_ego_mask) not_masked.append(ego_mask.astype(bool).reshape(-1)) # create output dirs for each cam seq_name = get_sequence_name(input_files[0][0]) for i_cam in range(N_cams): os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True) os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True) for i_file in range(0, N_files, 25): base_0, ext_0 = os.path.splitext( os.path.basename(input_files[0][i_file])) print(base_0) images = [] images_numpy = [] pred_inv_depths = [] pred_depths = [] world_points = [] input_depth_files = [] has_gt_depth = [] gt_depth = [] gt_depth_3d = [] pcl_full = [] pcl_only_inliers = [] pcl_only_outliers = [] pcl_gt = [] rgb = [] viz_pred_inv_depths = [] for i_cam in range(N_cams): images.append( load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) images_numpy.append(images[i_cam][0].cpu().numpy()) images_numpy[i_cam] = images_numpy[i_cam].reshape( (3, -1)).transpose() images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]] pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam], frame='w')) world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape( (3, -1)).transpose() world_points[i_cam] = world_points[i_cam][not_masked[i_cam]] cam_name = camera_names[i_cam] cam_int = cam_name.split('_')[-1] input_depth_files.append(get_depth_file( input_files[i_cam][i_file])) has_gt_depth.append(os.path.exists(input_depth_files[i_cam])) if has_gt_depth[i_cam]: gt_depth.append( np.load(input_depth_files[i_cam])['velodyne_depth'].astype( np.float32)) gt_depth[i_cam] = torch.from_numpy( gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format( rank()), dtype=dtype) gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w')) gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy() gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape( (3, -1)).transpose() #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]] else: gt_depth.append(0) gt_depth_3d.append(0) pcl_full.append(o3d.geometry.PointCloud()) pcl_full[i_cam].points = o3d.utility.Vector3dVector( world_points[i_cam]) pcl_full[i_cam].colors = o3d.utility.Vector3dVector( images_numpy[i_cam]) pcl = pcl_full[i_cam] #.select_by_index(ind) points_tmp = np.asarray(pcl.points) colors_tmp = np.asarray(pcl.colors) # remove points that are above mask_height = points_tmp[:, 2] > 2.5 # * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3) mask_colors_blue = np.sum( np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6 # bleu ciel mask_colors_green = np.sum( np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8 mask_colors_green2 = np.sum( np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2 mask = 1 - mask_height * mask_colors_blue mask2 = 1 - mask_height * mask_colors_green mask3 = 1 - mask_height * mask_colors_green2 mask = mask * mask2 * mask3 pcl = pcl.select_by_index(np.where(mask)[0]) cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.4) pcl = pcl.select_by_index(ind) pcl_only_inliers.append( pcl) #pcl_full[i_cam].select_by_index(ind)[mask]) #pcl_only_outliers.append(pcl_full[i_cam].select_by_index(ind, invert=True)) #pcl_only_outliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0]) if has_gt_depth[i_cam]: pcl_gt.append(o3d.geometry.PointCloud()) pcl_gt[i_cam].points = o3d.utility.Vector3dVector( gt_depth_3d[i_cam]) gt_inv_depth = 1 / ( np.linalg.norm(gt_depth_3d[i_cam], axis=1) + 1e-6) cm = get_cmap('plasma') normalizer = .35 #np.percentile(gt_inv_depth, 95) gt_inv_depth /= (normalizer + 1e-6) #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape) # [:, :3] pcl_gt[i_cam].colors = o3d.utility.Vector3dVector( cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3]) else: pcl_gt.append(0) #o3d.visualization.draw_geometries(pcl_full + [e for i, e in enumerate(pcl_gt) if e != 0]) # vis_full = o3d.visualization.Visualizer() # vis_full.create_window(visible = True, window_name = 'full'+str(i_file)) # for i_cam in range(N_cams): # vis_full.add_geometry(pcl_full[i_cam]) # for i, e in enumerate(pcl_gt): # if e != 0: # vis_full.add_geometry(e) # ctr = vis_full.get_view_control() # ctr.set_lookat(lookat_vector) # ctr.set_front(front_vector) # ctr.set_up(up_vector) # ctr.set_zoom(zoom_float) # #vis_full.run() # vis_full.destroy_window() N = 480 for i_cam_n in range(N): angle_str = str(int(360 / N * i_cam_n)) vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible=True, window_name='inliers' + str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters( '/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_' + str(i_cam_n) + 'stop.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if stop: vis_only_inliers.run() pcd1 = pcl_only_inliers[0] + pcl_only_inliers[ 1] + pcl_only_inliers[2] + pcl_only_inliers[3] for i_cam3 in range(4): if has_gt_depth[i_cam3]: pcd1 += pcl_gt[i_cam3] o3d.io.write_point_cloud( os.path.join(output_folder, seq_name, 'open3d', base_0 + str(i_cam_n) + '.pcd'), pcd1) #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) image = vis_only_inliers.capture_screen_float_buffer(False) plt.imsave(os.path.join(output_folder, seq_name, 'pcl', 'normal', str(i_cam_n) + '_normal' + '.png'), np.asarray(image), dpi=1) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt #del ctr #del vis_full #del vis_only_inliers #del vis_inliers_outliers #del vis_inliers_cropped for i_cam in range(N_cams): rgb.append( images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255) viz_pred_inv_depths.append( viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.8) * 255) viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape) == 0] = 0 concat = np.concatenate([rgb[i_cam], viz_pred_inv_depths[i_cam]], 0) # Save visualization output_file1 = os.path.join( output_folder, seq_name, 'depth', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) output_file2 = os.path.join( output_folder, seq_name, 'rgb', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1]) imwrite(output_file2, rgb[i_cam][:, :, ::-1])
def project_fisheye(self, X, mask, frame='w'): """ Projects 3D points onto the image plane Parameters ---------- X : torch.Tensor [B,3,H,W] 3D points to be projected frame : 'w' Reference frame: 'c' for camera and 'w' for world Returns ------- points : torch.Tensor [B,H,W,2] 2D projected points that are within the image boundaries """ B, C, H, W = X[mask].shape assert C == 3 # World to camera: if frame == 'c': Xc = X[mask].view(B, 3, -1) # [B, 3, HW] elif frame == 'w': Xc = (Pose(self.Tcw.mat[mask]) @ X[mask]).view(B, 3, -1) # [B, 3, HW] else: raise ValueError('Unknown reference frame {}'.format(frame)) c1 = self.poly_coeffs[mask, 0].unsqueeze(1) c2 = self.poly_coeffs[mask, 1].unsqueeze(1) c3 = self.poly_coeffs[mask, 2].unsqueeze(1) c4 = self.poly_coeffs[mask, 3].unsqueeze(1) # Project 3D points onto the camera image plane X = Xc[:, 0] # [B, HW] Y = Xc[:, 1] # [B, HW] Z = Xc[:, 2] # [B, HW] phi = torch.atan2(Y, X) # [B, HW] rc = torch.sqrt(torch.pow(X, 2) + torch.pow(Y, 2)) # [B, HW] theta_1 = math.pi / 2 - torch.atan2(Z, rc) # [B, HW] theta_2 = torch.pow(theta_1, 2) # [B, HW] theta_3 = torch.pow(theta_1, 3) # [B, HW] theta_4 = torch.pow(theta_1, 4) # [B, HW] rho = c1 * theta_1 + c2 * theta_2 + c3 * theta_3 + c4 * theta_4 # [B, HW] rho = rho * ((X != 0) | (Y != 0) | (Z != 0)) u = rho * torch.cos(phi) / self.scale_factors[mask, 0].unsqueeze(1) + self.principal_point[mask, 0].unsqueeze(1) # [B, HW] v = rho * torch.sin(phi) / self.scale_factors[mask, 1].unsqueeze(1) + self.principal_point[mask, 1].unsqueeze(1) # [B, HW] if torch.isnan(u).sum() > 0: print(str(torch.isnan(u).sum().item()) + ' nans in projected fisheye') if torch.isnan(v).sum() > 0: print(str(torch.isnan(v).sum().item()) + ' nans in projected fisheye') # Normalize points Xnorm = 2 * u / (W - 1)# - 1. Ynorm = 2 * v / (H - 1)# - 1. # Clamp out-of-bounds pixels Xmask = ((Xnorm > 1) + (Xnorm < -1)).detach() Xnorm[Xmask] = 2. Ymask = ((Ynorm > 1) + (Ynorm < -1)).detach() Ynorm[Ymask] = 2. mask_out = (theta_1 * 180 * 2 / np.pi > 190.0).detach() Xnorm[mask_out] = 2. Ynorm[mask_out] = 2. # Return pixel coordinates return torch.stack([Xnorm, Ynorm], dim=-1).view(B, H, W, 2)
def project_distorted(self, X, mask, frame='w'): """ Projects 3D points onto the image plane Parameters ---------- X : torch.Tensor [B,3,H,W] 3D points to be projected frame : 'w' Reference frame: 'c' for camera and 'w' for world Returns ------- points : torch.Tensor [B,H,W,2] 2D projected points that are within the image boundaries """ B, C, H, W = X[mask].shape assert C == 3 # Project 3D points onto the camera image plane if frame == 'c': Xc = X[mask].view(B, 3, -1) elif frame == 'w': Xc = (Pose(self.Tcw.mat[mask]) @ X[mask]).view(B, 3, -1) else: raise ValueError('Unknown reference frame {}'.format(frame)) # Z-Normalize points Z = Xc[:, 2].clamp(min=1e-5) Xn = Xc[:, 0] / Z Yn = Xc[:, 1] / Z veryFarMask = ((Xn > 5) + (Xn < -5) + (Yn > 5) + (Yn < -5)).detach() Xn[veryFarMask] = 0. Yn[veryFarMask] = 0. r2 = Xn.pow(2) + Yn.pow(2) r4 = r2.pow(2) r6 = r2 * r4 # Distorted normalized points rad_dist = (1 + self.k1[mask].view(B,1) * r2 + self.k2[mask].view(B,1) * r4 + self.k3[mask].view(B,1) * r6) Xd = Xn * rad_dist + 2 * self.p1[mask].view(B,1) * Xn * Yn + self.p2[mask].view(B,1) * (r2 + 2 * Xn.pow(2)) Yd = Yn * rad_dist + 2 * self.p2[mask].view(B,1) * Xn * Yn + self.p1[mask].view(B,1) * (r2 + 2 * Yn.pow(2)) # Final projection u = self.fx[mask].view(B,1) * Xd + self.cx[mask].view(B,1) v = self.fy[mask].view(B,1) * Yd + self.cy[mask].view(B,1) # normalized coordinates uNorm = 2 * u / (W - 1) - 1. vNorm = 2 * v / (H - 1) - 1. # Clamp out-of-bounds pixels Xmask = ((uNorm > 1) + (uNorm < -1)).detach() uNorm[Xmask] = 2. Ymask = ((vNorm > 1) + (vNorm < -1)).detach() vNorm[Ymask] = 2. # Clamp very far out-of-bounds pixels uNorm[veryFarMask] = 2. vNorm[veryFarMask] = 2. # Return pixel coordinates return torch.stack([uNorm, vNorm], dim=-1).view(B, H, W, 2).float()
def infer_plot_and_save_3D_pcl(input_files, output_folder, model_wrappers, image_shape, half, save, stop): """ Process a single input file to produce and save visualization Parameters ---------- input_file : list (number of cameras) of lists (number of files) of str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ N_cams = len(input_files) N_files = len(input_files[0]) camera_names = [] for i_cam in range(N_cams): camera_names.append(get_camera_name(input_files[i_cam][0])) cams = [] not_masked = [] cams_x = [] cams_y = [] cams_z = [] alpha_mask = 0.5 # change to half precision for evaluation if requested dtype = torch.float16 if half else None bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5)) # let's assume all images are from the same sequence (thus same cameras) for i_cam in range(N_cams): base_folder_str = get_base_folder(input_files[i_cam][0]) split_type_str = get_split_type(input_files[i_cam][0]) seq_name_str = get_sequence_name(input_files[i_cam][0]) camera_str = get_camera_name(input_files[i_cam][0]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo(base_folder_str, split_type_str, seq_name_str, camera_str) cams_x.append(float(calib_data[camera_str]['extrinsics']['pos_x_m'])) cams_y.append(float(calib_data[camera_str]['extrinsics']['pos_y_m'])) cams_z.append(float(calib_data[camera_str]['extrinsics']['pos_y_m'])) path_to_theta_lut = get_path_to_theta_lut(input_files[i_cam][0]) path_to_ego_mask = get_path_to_ego_mask(input_files[i_cam][0]) poly_coeffs, principal_point, scale_factors = get_intrinsics(input_files[i_cam][0], calib_data) poly_coeffs = torch.from_numpy(poly_coeffs).unsqueeze(0) principal_point = torch.from_numpy(principal_point).unsqueeze(0) scale_factors = torch.from_numpy(scale_factors).unsqueeze(0) pose_matrix = torch.from_numpy(get_extrinsics_pose_matrix(input_files[i_cam][0], calib_data)).unsqueeze(0) pose_tensor = Pose(pose_matrix) cams.append(CameraFisheye(path_to_theta_lut=[path_to_theta_lut], path_to_ego_mask=[path_to_ego_mask], poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float(), Tcw=pose_tensor)) if torch.cuda.is_available(): cams[i_cam] = cams[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) ego_mask = np.load(path_to_ego_mask) not_masked.append(ego_mask.astype(bool).reshape(-1)) cams_middle = np.zeros(3) cams_middle[0] = (cams_x[0] + cams_x[1] + cams_x[2] + cams_x[3]) / 4 cams_middle[1] = (cams_y[0] + cams_y[1] + cams_y[2] + cams_y[3]) / 4 cams_middle[2] = (cams_z[0] + cams_z[1] + cams_z[2] + cams_z[3]) / 4 # create output dirs for each cam seq_name = get_sequence_name(input_files[0][0]) for i_cam in range(N_cams): os.makedirs(os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam]), exist_ok=True) os.makedirs(os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam]), exist_ok=True) for i_file in range(0, N_files): base_0, ext_0 = os.path.splitext(os.path.basename(input_files[0][i_file])) print(base_0) os.makedirs(os.path.join(output_folder, seq_name, 'pcl', base_0), exist_ok=True) images = [] images_numpy = [] pred_inv_depths = [] pred_depths = [] world_points = [] input_depth_files = [] has_gt_depth = [] input_full_masks = [] has_full_mask = [] gt_depth = [] gt_depth_3d = [] pcl_full = [] pcl_only_inliers = [] pcl_only_outliers = [] pcl_gt = [] rgb = [] viz_pred_inv_depths = [] for i_cam in range(N_cams): images.append(load_image(input_files[i_cam][i_file]).convert('RGB')) images[i_cam] = resize_image(images[i_cam], image_shape) images[i_cam] = to_tensor(images[i_cam]).unsqueeze(0) if torch.cuda.is_available(): images[i_cam] = images[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) pred_inv_depths.append(model_wrappers[i_cam].depth(images[i_cam])) pred_depths.append(inv2depth(pred_inv_depths[i_cam])) pred_depth_copy = pred_depths[i_cam].squeeze(0).squeeze(0).cpu().numpy() pred_depth_copy = np.uint8(pred_depth_copy) lap = np.uint8(np.absolute(cv2.Laplacian(pred_depth_copy,cv2.CV_64F,ksize=3))) great_lap = lap < 4 great_lap = great_lap.reshape(-1) images_numpy.append(images[i_cam][0].cpu().numpy()) images_numpy[i_cam] = images_numpy[i_cam].reshape((3, -1)).transpose() images_numpy[i_cam] = images_numpy[i_cam][not_masked[i_cam]*great_lap] world_points.append(cams[i_cam].reconstruct(pred_depths[i_cam], frame='w')) world_points[i_cam] = world_points[i_cam][0].cpu().numpy() world_points[i_cam] = world_points[i_cam].reshape((3, -1)).transpose() world_points[i_cam] = world_points[i_cam][not_masked[i_cam]*great_lap] cam_name = camera_names[i_cam] cam_int = cam_name.split('_')[-1] input_depth_files.append(get_depth_file(input_files[i_cam][i_file])) has_gt_depth.append(os.path.exists(input_depth_files[i_cam])) if has_gt_depth[i_cam]: gt_depth.append(np.load(input_depth_files[i_cam])['velodyne_depth'].astype(np.float32)) gt_depth[i_cam] = torch.from_numpy(gt_depth[i_cam]).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth[i_cam] = gt_depth[i_cam].to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d.append(cams[i_cam].reconstruct(gt_depth[i_cam], frame='w')) gt_depth_3d[i_cam] = gt_depth_3d[i_cam][0].cpu().numpy() gt_depth_3d[i_cam] = gt_depth_3d[i_cam].reshape((3, -1)).transpose() #gt_depth_3d[i_cam] = gt_depth_3d[i_cam][not_masked[i_cam]] else: gt_depth.append(0) gt_depth_3d.append(0) input_full_masks.append(get_full_mask_file(input_files[i_cam][i_file])) has_full_mask.append(os.path.exists(input_full_masks[i_cam])) pcl_full.append(o3d.geometry.PointCloud()) pcl_full[i_cam].points = o3d.utility.Vector3dVector(world_points[i_cam]) if has_full_mask[i_cam]: full_mask = np.load(input_full_masks[i_cam]) mask_colors = label_colors[correspondence[full_mask]].reshape((-1, 3))#.transpose() mask_colors = mask_colors[not_masked[i_cam]*great_lap] pcl_full[i_cam].colors = o3d.utility.Vector3dVector(alpha_mask * mask_colors + (1-alpha_mask) * images_numpy[i_cam]) else: pcl_full[i_cam].colors = o3d.utility.Vector3dVector(images_numpy[i_cam]) pcl = pcl_full[i_cam]#.select_by_index(ind) points_tmp = np.asarray(pcl.points) colors_tmp = images_numpy[i_cam]#np.asarray(pcl.colors) # remove points that are above mask_height = points_tmp[:, 2] > 1.5# * (abs(points_tmp[:, 0]) < 10) * (abs(points_tmp[:, 1]) < 3) mask_colors_blue = np.sum(np.abs(colors_tmp - np.array([0.6, 0.8, 1])), axis=1) < 0.6 # bleu ciel mask_colors_green = np.sum(np.abs(colors_tmp - np.array([0.2, 1, 0.4])), axis=1) < 0.8 mask_colors_green2 = np.sum(np.abs(colors_tmp - np.array([0, 0.5, 0.15])), axis=1) < 0.2 mask = 1-mask_height*mask_colors_blue mask2 = 1-mask_height*mask_colors_green mask3 = 1- mask_height*mask_colors_green2 mask = mask*mask2*mask3 pcl = pcl.select_by_index(np.where(mask)[0]) cl, ind = pcl.remove_statistical_outlier(nb_neighbors=7, std_ratio=1.2) pcl = pcl.select_by_index(ind) pcl = pcl.voxel_down_sample(voxel_size=0.02) #if has_full_mask[i_cam]: # pcl.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.2, max_nn=15)) pcl_only_inliers.append(pcl)#pcl_full[i_cam].select_by_index(ind)[mask]) #pcl_only_outliers.append(pcl_full[i_cam].select_by_index(ind, invert=True)) #pcl_only_outliers[i_cam].paint_uniform_color([0.0, 0.0, 1.0]) if has_gt_depth[i_cam]: pcl_gt.append(o3d.geometry.PointCloud()) pcl_gt[i_cam].points = o3d.utility.Vector3dVector(gt_depth_3d[i_cam]) gt_inv_depth = 1 / (np.linalg.norm(gt_depth_3d[i_cam] - cams_middle, axis=1) + 1e-6) cm = get_cmap('plasma') normalizer = .35#np.percentile(gt_inv_depth, 95) gt_inv_depth /= (normalizer + 1e-6) #print(cm(np.clip(gt_inv_depth, 0., 1.0)).shape) # [:, :3] pcl_gt[i_cam].colors = o3d.utility.Vector3dVector(cm(np.clip(gt_inv_depth, 0., 1.0))[:, :3]) else: pcl_gt.append(0) # threshold = 0.5 # threshold2 = 0.1 # for i_cam in range(4): # if has_full_mask[i_cam]: # for relative in [-1, 1]: # if not has_full_mask[(i_cam + relative) % 4]: # dists = pcl_only_inliers[(i_cam + relative) % 4].compute_point_cloud_distance(pcl_only_inliers[i_cam]) # p1 = pcl_only_inliers[(i_cam + relative) % 4].select_by_index(np.where(np.asarray(dists) > threshold)[0]) # p2 = pcl_only_inliers[(i_cam + relative) % 4].select_by_index(np.where(np.asarray(dists) > threshold)[0], invert=True).uniform_down_sample(15)#.voxel_down_sample(voxel_size=0.5) # pcl_only_inliers[(i_cam + relative) % 4] = p1 + p2 # if has_gt_depth[i_cam]: # if has_full_mask[i_cam]: # down = 15 # else: # down = 30 # dists = pcl_only_inliers[i_cam].compute_point_cloud_distance(pcl_gt[i_cam]) # p1 = pcl_only_inliers[i_cam].select_by_index(np.where(np.asarray(dists) > threshold2)[0]) # p2 = pcl_only_inliers[i_cam].select_by_index(np.where(np.asarray(dists) > threshold2)[0], invert=True).uniform_down_sample(down)#.voxel_down_sample(voxel_size=0.5) # pcl_only_inliers[i_cam] = p1 + p2 for i_cam_n in range(120): vis_only_inliers = o3d.visualization.Visualizer() vis_only_inliers.create_window(visible = True, window_name = 'inliers'+str(i_file)) for i_cam in range(N_cams): vis_only_inliers.add_geometry(pcl_only_inliers[i_cam]) for i, e in enumerate(pcl_gt): if e != 0: vis_only_inliers.add_geometry(e) ctr = vis_only_inliers.get_view_control() ctr.set_lookat(lookat_vector) ctr.set_front(front_vector) ctr.set_up(up_vector) ctr.set_zoom(zoom_float) param = o3d.io.read_pinhole_camera_parameters('/home/vbelissen/Downloads/test/cameras_jsons/sequence/test1_'+str(i_cam_n)+'v3rear.json') ctr.convert_from_pinhole_camera_parameters(param) opt = vis_only_inliers.get_render_option() opt.background_color = np.asarray([0, 0, 0]) opt.point_size = 3.0 #opt.light_on = False #vis_only_inliers.update_geometry('inliers0') vis_only_inliers.poll_events() vis_only_inliers.update_renderer() if stop: vis_only_inliers.run() pcd1 = pcl_only_inliers[0]+pcl_only_inliers[1]+pcl_only_inliers[2]+pcl_only_inliers[3] for i_cam3 in range(4): if has_gt_depth[i_cam3]: pcd1 += pcl_gt[i_cam3] #o3d.io.write_point_cloud(os.path.join(output_folder, seq_name, 'open3d', base_0 + '.pcd'), pcd1) #param = vis_only_inliers.get_view_control().convert_to_pinhole_camera_parameters() #o3d.io.write_pinhole_camera_parameters('/home/vbelissen/Downloads/test.json', param) image = vis_only_inliers.capture_screen_float_buffer(False) plt.imsave(os.path.join(output_folder, seq_name, 'pcl', base_0, str(i_cam_n) + '.png'), np.asarray(image), dpi=1) vis_only_inliers.destroy_window() del ctr del vis_only_inliers del opt # vis_inliers_outliers = o3d.visualization.Visualizer() # vis_inliers_outliers.create_window(visible = True, window_name = 'inout'+str(i_file)) # for i_cam in range(N_cams): # vis_inliers_outliers.add_geometry(pcl_only_inliers[i_cam]) # vis_inliers_outliers.add_geometry(pcl_only_outliers[i_cam]) # for i, e in enumerate(pcl_gt): # if e != 0: # vis_inliers_outliers.add_geometry(e) # ctr = vis_inliers_outliers.get_view_control() # ctr.set_lookat(lookat_vector) # ctr.set_front(front_vector) # ctr.set_up(up_vector) # ctr.set_zoom(zoom_float) # #vis_inliers_outliers.run() # vis_inliers_outliers.destroy_window() # for i_cam2 in range(4): # for suff in ['', 'bis', 'ter']: # vis_inliers_cropped = o3d.visualization.Visualizer() # vis_inliers_cropped.create_window(visible = True, window_name = 'incrop'+str(i_file)) # for i_cam in range(N_cams): # vis_inliers_cropped.add_geometry(pcl_only_inliers[i_cam].crop(bbox)) # for i, e in enumerate(pcl_gt): # if e != 0: # vis_inliers_cropped.add_geometry(e) # ctr = vis_inliers_cropped.get_view_control() # ctr.set_lookat(lookat_vector) # ctr.set_front(front_vector) # ctr.set_up(up_vector) # ctr.set_zoom(zoom_float) # param = o3d.io.read_pinhole_camera_parameters( # '/home/vbelissen/Downloads/test/cameras_jsons/test' + str(i_cam2 + 1) + suff + '.json') # ctr.convert_from_pinhole_camera_parameters(param) # opt = vis_inliers_cropped.get_render_option() # opt.background_color = np.asarray([0, 0, 0]) # vis_inliers_cropped.poll_events() # vis_inliers_cropped.update_renderer() # #vis_inliers_cropped.run() # image = vis_inliers_cropped.capture_screen_float_buffer(False) # plt.imsave(os.path.join(output_folder, seq_name, 'pcl', 'cropped', str(i_cam2) + suff, base_0 + '_cropped_' + str(i_cam2) + suff + '.png'), # np.asarray(image), dpi=1) # vis_inliers_cropped.destroy_window() # del ctr # del opt # del vis_inliers_cropped #del ctr #del vis_full #del vis_only_inliers #del vis_inliers_outliers #del vis_inliers_cropped for i_cam in range(N_cams): rgb.append(images[i_cam][0].permute(1, 2, 0).detach().cpu().numpy() * 255) viz_pred_inv_depths.append(viz_inv_depth(pred_inv_depths[i_cam][0], normalizer=0.8) * 255) viz_pred_inv_depths[i_cam][not_masked[i_cam].reshape(image_shape) == 0] = 0 concat = np.concatenate([rgb[i_cam], viz_pred_inv_depths[i_cam]], 0) # Save visualization output_file1 = os.path.join(output_folder, seq_name, 'depth', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) output_file2 = os.path.join(output_folder, seq_name, 'rgb', camera_names[i_cam], os.path.basename(input_files[i_cam][i_file])) imwrite(output_file1, viz_pred_inv_depths[i_cam][:, :, ::-1]) if has_full_mask[i_cam]: full_mask = np.load(input_full_masks[i_cam]) mask_colors = label_colors[correspondence[full_mask]] imwrite(output_file2, (1-alpha_mask) * rgb[i_cam][:, :, ::-1] + alpha_mask * mask_colors[:, :, ::-1]*255) else: imwrite(output_file2, rgb[i_cam][:, :, ::-1])
def infer_plot_and_save_3D_pcl(input_file1, input_file2, input_file3, input_file4, output_file1, output_file2, output_file3, output_file4, model_wrapper1, model_wrapper2, model_wrapper3, model_wrapper4, hasGTdepth1, hasGTdepth2, hasGTdepth3, hasGTdepth4, image_shape, half, save): """ Process a single input file to produce and save visualization Parameters ---------- input_file : str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ if not is_image(output_file1): # If not an image, assume it's a folder and append the input name os.makedirs(output_file1, exist_ok=True) output_file1 = os.path.join(output_file1, os.path.basename(input_file1)) if not is_image(output_file2): # If not an image, assume it's a folder and append the input name os.makedirs(output_file2, exist_ok=True) output_file2 = os.path.join(output_file2, os.path.basename(input_file2)) if not is_image(output_file3): # If not an image, assume it's a folder and append the input name os.makedirs(output_file3, exist_ok=True) output_file3 = os.path.join(output_file3, os.path.basename(input_file3)) if not is_image(output_file4): # If not an image, assume it's a folder and append the input name os.makedirs(output_file4, exist_ok=True) output_file4 = os.path.join(output_file4, os.path.basename(input_file4)) # change to half precision for evaluation if requested dtype = torch.float16 if half else None # Load image image1 = load_image(input_file1).convert('RGB') image2 = load_image(input_file2).convert('RGB') image3 = load_image(input_file3).convert('RGB') image4 = load_image(input_file4).convert('RGB') # Resize and to tensor image1 = resize_image(image1, image_shape) image2 = resize_image(image2, image_shape) image3 = resize_image(image3, image_shape) image4 = resize_image(image4, image_shape) image1 = to_tensor(image1).unsqueeze(0) image2 = to_tensor(image2).unsqueeze(0) image3 = to_tensor(image3).unsqueeze(0) image4 = to_tensor(image4).unsqueeze(0) # Send image to GPU if available if torch.cuda.is_available(): image1 = image1.to('cuda:{}'.format(rank()), dtype=dtype) image2 = image2.to('cuda:{}'.format(rank()), dtype=dtype) image3 = image3.to('cuda:{}'.format(rank()), dtype=dtype) image4 = image4.to('cuda:{}'.format(rank()), dtype=dtype) # Depth inference (returns predicted inverse depth) pred_inv_depth1 = model_wrapper1.depth(image1) pred_inv_depth2 = model_wrapper2.depth(image2) pred_inv_depth3 = model_wrapper1.depth(image3) pred_inv_depth4 = model_wrapper2.depth(image4) pred_depth1 = inv2depth(pred_inv_depth1) pred_depth2 = inv2depth(pred_inv_depth2) pred_depth3 = inv2depth(pred_inv_depth3) pred_depth4 = inv2depth(pred_inv_depth4) base_folder_str1 = get_base_folder(input_file1) split_type_str1 = get_split_type(input_file1) seq_name_str1 = get_sequence_name(input_file1) camera_str1 = get_camera_name(input_file1) base_folder_str2 = get_base_folder(input_file2) split_type_str2 = get_split_type(input_file2) seq_name_str2 = get_sequence_name(input_file2) camera_str2 = get_camera_name(input_file2) base_folder_str3 = get_base_folder(input_file3) split_type_str3 = get_split_type(input_file3) seq_name_str3 = get_sequence_name(input_file3) camera_str3 = get_camera_name(input_file3) base_folder_str4 = get_base_folder(input_file4) split_type_str4 = get_split_type(input_file4) seq_name_str4 = get_sequence_name(input_file4) camera_str4 = get_camera_name(input_file4) calib_data1 = {} calib_data2 = {} calib_data3 = {} calib_data4 = {} calib_data1[camera_str1] = read_raw_calib_files_camera_valeo( base_folder_str1, split_type_str1, seq_name_str1, camera_str1) calib_data2[camera_str2] = read_raw_calib_files_camera_valeo( base_folder_str2, split_type_str2, seq_name_str2, camera_str2) calib_data3[camera_str3] = read_raw_calib_files_camera_valeo( base_folder_str3, split_type_str3, seq_name_str3, camera_str3) calib_data4[camera_str4] = read_raw_calib_files_camera_valeo( base_folder_str4, split_type_str4, seq_name_str4, camera_str4) path_to_theta_lut1 = get_path_to_theta_lut(input_file1) path_to_ego_mask1 = get_path_to_ego_mask(input_file1) poly_coeffs1, principal_point1, scale_factors1 = get_intrinsics( input_file1, calib_data1) path_to_theta_lut2 = get_path_to_theta_lut(input_file2) path_to_ego_mask2 = get_path_to_ego_mask(input_file2) poly_coeffs2, principal_point2, scale_factors2 = get_intrinsics( input_file2, calib_data2) path_to_theta_lut3 = get_path_to_theta_lut(input_file3) path_to_ego_mask3 = get_path_to_ego_mask(input_file3) poly_coeffs3, principal_point3, scale_factors3 = get_intrinsics( input_file3, calib_data3) path_to_theta_lut4 = get_path_to_theta_lut(input_file4) path_to_ego_mask4 = get_path_to_ego_mask(input_file4) poly_coeffs4, principal_point4, scale_factors4 = get_intrinsics( input_file4, calib_data4) poly_coeffs1 = torch.from_numpy(poly_coeffs1).unsqueeze(0) principal_point1 = torch.from_numpy(principal_point1).unsqueeze(0) scale_factors1 = torch.from_numpy(scale_factors1).unsqueeze(0) poly_coeffs2 = torch.from_numpy(poly_coeffs2).unsqueeze(0) principal_point2 = torch.from_numpy(principal_point2).unsqueeze(0) scale_factors2 = torch.from_numpy(scale_factors2).unsqueeze(0) poly_coeffs3 = torch.from_numpy(poly_coeffs3).unsqueeze(0) principal_point3 = torch.from_numpy(principal_point3).unsqueeze(0) scale_factors3 = torch.from_numpy(scale_factors3).unsqueeze(0) poly_coeffs4 = torch.from_numpy(poly_coeffs4).unsqueeze(0) principal_point4 = torch.from_numpy(principal_point4).unsqueeze(0) scale_factors4 = torch.from_numpy(scale_factors4).unsqueeze(0) pose_matrix1 = torch.from_numpy( get_extrinsics_pose_matrix(input_file1, calib_data1)).unsqueeze(0) pose_matrix2 = torch.from_numpy( get_extrinsics_pose_matrix(input_file2, calib_data2)).unsqueeze(0) pose_matrix3 = torch.from_numpy( get_extrinsics_pose_matrix(input_file3, calib_data3)).unsqueeze(0) pose_matrix4 = torch.from_numpy( get_extrinsics_pose_matrix(input_file4, calib_data4)).unsqueeze(0) pose_tensor1 = Pose(pose_matrix1) pose_tensor2 = Pose(pose_matrix2) pose_tensor3 = Pose(pose_matrix3) pose_tensor4 = Pose(pose_matrix4) ego_mask1 = np.load(path_to_ego_mask1) ego_mask2 = np.load(path_to_ego_mask2) ego_mask3 = np.load(path_to_ego_mask3) ego_mask4 = np.load(path_to_ego_mask4) not_masked1 = ego_mask1.astype(bool).reshape(-1) not_masked2 = ego_mask2.astype(bool).reshape(-1) not_masked3 = ego_mask3.astype(bool).reshape(-1) not_masked4 = ego_mask4.astype(bool).reshape(-1) cam1 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut1], path_to_ego_mask=[path_to_ego_mask1], poly_coeffs=poly_coeffs1.float(), principal_point=principal_point1.float(), scale_factors=scale_factors1.float(), Tcw=pose_tensor1) cam2 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut2], path_to_ego_mask=[path_to_ego_mask2], poly_coeffs=poly_coeffs2.float(), principal_point=principal_point2.float(), scale_factors=scale_factors2.float(), Tcw=pose_tensor2) cam3 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut3], path_to_ego_mask=[path_to_ego_mask3], poly_coeffs=poly_coeffs3.float(), principal_point=principal_point3.float(), scale_factors=scale_factors3.float(), Tcw=pose_tensor3) cam4 = CameraFisheye(path_to_theta_lut=[path_to_theta_lut4], path_to_ego_mask=[path_to_ego_mask4], poly_coeffs=poly_coeffs4.float(), principal_point=principal_point4.float(), scale_factors=scale_factors4.float(), Tcw=pose_tensor4) if torch.cuda.is_available(): cam1 = cam1.to('cuda:{}'.format(rank()), dtype=dtype) cam2 = cam2.to('cuda:{}'.format(rank()), dtype=dtype) cam3 = cam3.to('cuda:{}'.format(rank()), dtype=dtype) cam4 = cam4.to('cuda:{}'.format(rank()), dtype=dtype) world_points1 = cam1.reconstruct(pred_depth1, frame='w') world_points1 = world_points1[0].cpu().numpy() world_points1 = world_points1.reshape((3, -1)).transpose() world_points2 = cam2.reconstruct(pred_depth2, frame='w') world_points2 = world_points2[0].cpu().numpy() world_points2 = world_points2.reshape((3, -1)).transpose() world_points3 = cam3.reconstruct(pred_depth3, frame='w') world_points3 = world_points3[0].cpu().numpy() world_points3 = world_points3.reshape((3, -1)).transpose() world_points4 = cam4.reconstruct(pred_depth4, frame='w') world_points4 = world_points4[0].cpu().numpy() world_points4 = world_points4.reshape((3, -1)).transpose() if hasGTdepth1: gt_depth_file1 = get_depth_file(input_file1) gt_depth1 = np.load(gt_depth_file1)['velodyne_depth'].astype( np.float32) gt_depth1 = torch.from_numpy(gt_depth1).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth1 = gt_depth1.to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d1 = cam1.reconstruct(gt_depth1, frame='w') gt_depth_3d1 = gt_depth_3d1[0].cpu().numpy() gt_depth_3d1 = gt_depth_3d1.reshape((3, -1)).transpose() if hasGTdepth2: gt_depth_file2 = get_depth_file(input_file2) gt_depth2 = np.load(gt_depth_file2)['velodyne_depth'].astype( np.float32) gt_depth2 = torch.from_numpy(gt_depth2).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth2 = gt_depth2.to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d2 = cam2.reconstruct(gt_depth2, frame='w') gt_depth_3d2 = gt_depth_3d2[0].cpu().numpy() gt_depth_3d2 = gt_depth_3d2.reshape((3, -1)).transpose() if hasGTdepth3: gt_depth_file3 = get_depth_file(input_file3) gt_depth3 = np.load(gt_depth_file3)['velodyne_depth'].astype( np.float33) gt_depth3 = torch.from_numpy(gt_depth3).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth3 = gt_depth3.to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d3 = cam3.reconstruct(gt_depth3, frame='w') gt_depth_3d3 = gt_depth_3d3[0].cpu().numpy() gt_depth_3d3 = gt_depth_3d3.reshape((3, -1)).transpose() if hasGTdepth4: gt_depth_file4 = get_depth_file(input_file4) gt_depth4 = np.load(gt_depth_file4)['velodyne_depth'].astype( np.float34) gt_depth4 = torch.from_numpy(gt_depth4).unsqueeze(0).unsqueeze(0) if torch.cuda.is_available(): gt_depth4 = gt_depth4.to('cuda:{}'.format(rank()), dtype=dtype) gt_depth_3d4 = cam4.reconstruct(gt_depth4, frame='w') gt_depth_3d4 = gt_depth_3d4[0].cpu().numpy() gt_depth_3d4 = gt_depth_3d4.reshape((3, -1)).transpose() world_points1 = world_points1[not_masked1] world_points2 = world_points2[not_masked2] world_points3 = world_points3[not_masked3] world_points4 = world_points4[not_masked4] if hasGTdepth1: gt_depth_3d1 = gt_depth_3d1[not_masked1] if hasGTdepth2: gt_depth_3d2 = gt_depth_3d2[not_masked1] if hasGTdepth3: gt_depth_3d3 = gt_depth_3d3[not_masked3] if hasGTdepth4: gt_depth_3d4 = gt_depth_3d4[not_masked3] pcl1 = o3d.geometry.PointCloud() pcl1.points = o3d.utility.Vector3dVector(world_points1) img_numpy1 = image1[0].cpu().numpy() img_numpy1 = img_numpy1.reshape((3, -1)).transpose() pcl2 = o3d.geometry.PointCloud() pcl2.points = o3d.utility.Vector3dVector(world_points2) img_numpy2 = image2[0].cpu().numpy() img_numpy2 = img_numpy2.reshape((3, -1)).transpose() pcl3 = o3d.geometry.PointCloud() pcl3.points = o3d.utility.Vector3dVector(world_points3) img_numpy3 = image3[0].cpu().numpy() img_numpy3 = img_numpy3.reshape((3, -1)).transpose() pcl4 = o3d.geometry.PointCloud() pcl4.points = o3d.utility.Vector3dVector(world_points4) img_numpy4 = image4[0].cpu().numpy() img_numpy4 = img_numpy4.reshape((3, -1)).transpose() img_numpy1 = img_numpy1[not_masked1] pcl1.colors = o3d.utility.Vector3dVector(img_numpy1) img_numpy2 = img_numpy2[not_masked2] pcl2.colors = o3d.utility.Vector3dVector(img_numpy2) img_numpy3 = img_numpy3[not_masked3] pcl3.colors = o3d.utility.Vector3dVector(img_numpy3) img_numpy4 = img_numpy4[not_masked4] pcl4.colors = o3d.utility.Vector3dVector(img_numpy4) #pcl.paint_uniform_color([1.0, 0.0, 0]) #print("Radius oulier removal") #cl, ind = pcl.remove_radius_outlier(nb_points=10, radius=0.5) #display_inlier_outlier(pcl, ind) remove_outliers = True if remove_outliers: cl1, ind1 = pcl1.remove_statistical_outlier(nb_neighbors=10, std_ratio=1.3) inlier_cloud1 = pcl1.select_by_index(ind1) outlier_cloud1 = pcl1.select_by_index(ind1, invert=True) outlier_cloud1.paint_uniform_color([0.0, 0.0, 1.0]) cl2, ind2 = pcl2.remove_statistical_outlier(nb_neighbors=10, std_ratio=1.3) inlier_cloud2 = pcl2.select_by_index(ind2) outlier_cloud2 = pcl2.select_by_index(ind2, invert=True) outlier_cloud2.paint_uniform_color([0.0, 0.0, 1.0]) cl3, ind3 = pcl3.remove_statistical_outlier(nb_neighbors=10, std_ratio=1.3) inlier_cloud3 = pcl3.select_by_index(ind3) outlier_cloud3 = pcl3.select_by_index(ind3, invert=True) outlier_cloud3.paint_uniform_color([0.0, 0.0, 1.0]) cl4, ind4 = pcl4.remove_statistical_outlier(nb_neighbors=10, std_ratio=1.3) inlier_cloud4 = pcl4.select_by_index(ind4) outlier_cloud4 = pcl4.select_by_index(ind4, invert=True) outlier_cloud4.paint_uniform_color([0.0, 0.0, 1.0]) if hasGTdepth1: pcl_gt1 = o3d.geometry.PointCloud() pcl_gt1.points = o3d.utility.Vector3dVector(gt_depth_3d1) pcl_gt1.paint_uniform_color([1.0, 0.0, 0]) if hasGTdepth2: pcl_gt2 = o3d.geometry.PointCloud() pcl_gt2.points = o3d.utility.Vector3dVector(gt_depth_3d2) pcl_gt2.paint_uniform_color([1.0, 0.0, 0]) if hasGTdepth3: pcl_gt3 = o3d.geometry.PointCloud() pcl_gt3.points = o3d.utility.Vector3dVector(gt_depth_3d3) pcl_gt3.paint_uniform_color([1.0, 0.0, 0]) if hasGTdepth4: pcl_gt4 = o3d.geometry.PointCloud() pcl_gt4.points = o3d.utility.Vector3dVector(gt_depth_3d4) pcl_gt4.paint_uniform_color([1.0, 0.0, 0]) if remove_outliers: toPlot = [inlier_cloud1, inlier_cloud2, inlier_cloud3, inlier_cloud4] if hasGTdepth1: toPlot.append(pcl_gt1) if hasGTdepth2: toPlot.append(pcl_gt2) if hasGTdepth3: toPlot.append(pcl_gt3) if hasGTdepth4: toPlot.append(pcl_gt4) toPlotClear = list(toPlot) toPlot.append(outlier_cloud1) toPlot.append(outlier_cloud2) toPlot.append(outlier_cloud3) toPlot.append(outlier_cloud4) o3d.visualization.draw_geometries(toPlot) o3d.visualization.draw_geometries(toPlotClear) toPlot = [pcl1, pcl2, pcl3, pcl4] if hasGTdepth1: toPlot.append(pcl_gt1) if hasGTdepth2: toPlot.append(pcl_gt2) if hasGTdepth3: toPlot.append(pcl_gt3) if hasGTdepth4: toPlot.append(pcl_gt4) o3d.visualization.draw_geometries(toPlot) bbox = o3d.geometry.AxisAlignedBoundingBox(min_bound=(-1000, -1000, -1), max_bound=(1000, 1000, 5)) toPlot = [ pcl1.crop(bbox), pcl2.crop(bbox), pcl3.crop(bbox), pcl4.crop(bbox) ] if hasGTdepth1: toPlot.append(pcl_gt1) if hasGTdepth2: toPlot.append(pcl_gt2) if hasGTdepth3: toPlot.append(pcl_gt3) if hasGTdepth4: toPlot.append(pcl_gt4) o3d.visualization.draw_geometries(toPlot) rgb1 = image1[0].permute(1, 2, 0).detach().cpu().numpy() * 255 rgb2 = image2[0].permute(1, 2, 0).detach().cpu().numpy() * 255 rgb3 = image3[0].permute(1, 2, 0).detach().cpu().numpy() * 255 rgb4 = image4[0].permute(1, 2, 0).detach().cpu().numpy() * 255 # Prepare inverse depth viz_pred_inv_depth1 = viz_inv_depth(pred_inv_depth1[0]) * 255 viz_pred_inv_depth2 = viz_inv_depth(pred_inv_depth2[0]) * 255 viz_pred_inv_depth3 = viz_inv_depth(pred_inv_depth3[0]) * 255 viz_pred_inv_depth4 = viz_inv_depth(pred_inv_depth4[0]) * 255 # Concatenate both vertically image1 = np.concatenate([rgb1, viz_pred_inv_depth1], 0) image2 = np.concatenate([rgb2, viz_pred_inv_depth2], 0) image3 = np.concatenate([rgb3, viz_pred_inv_depth3], 0) image4 = np.concatenate([rgb4, viz_pred_inv_depth4], 0) # Save visualization print('Saving {} to {}'.format( pcolor(input_file1, 'cyan', attrs=['bold']), pcolor(output_file1, 'magenta', attrs=['bold']))) imwrite(output_file1, image1[:, :, ::-1]) print('Saving {} to {}'.format( pcolor(input_file2, 'cyan', attrs=['bold']), pcolor(output_file2, 'magenta', attrs=['bold']))) imwrite(output_file2, image2[:, :, ::-1]) print('Saving {} to {}'.format( pcolor(input_file3, 'cyan', attrs=['bold']), pcolor(output_file3, 'magenta', attrs=['bold']))) imwrite(output_file3, image3[:, :, ::-1]) print('Saving {} to {}'.format( pcolor(input_file4, 'cyan', attrs=['bold']), pcolor(output_file4, 'magenta', attrs=['bold']))) imwrite(output_file4, image4[:, :, ::-1])
np.array([[0.046296, -7.33178]])).float(), scale_factors=torch.from_numpy(np.array([[1.0, 1.0] ])).float()) cam_left = CameraFisheye(path_to_theta_lut=[ '/home/data/vbelissen/valeo_multiview/calibrations_theta_lut/fisheye/test/20170320_163113/20170320_163113_cam_0_1280_800.npy' ], path_to_ego_mask=[''], poly_coeffs=torch.from_numpy( np.array([[282.85, -27.8671, 114.318, -36.6703]])).float(), principal_point=torch.from_numpy( np.array([[0.046296, -7.33178]])).float(), scale_factors=torch.from_numpy(np.array([[1.0, 1.0] ])).float(), Tcw=Pose(relative_pose_left_torch)) cam_front = cam_front.to(torch.device('cuda')) cam_left = cam_left.to(torch.device('cuda')) world_points = cam_front.reconstruct(simulated_depth, frame='w') ref_coords_left = cam_left.project(world_points, frame='w') warped_front_left = funct.grid_sample(left_img_torch, ref_coords_left, mode='bilinear', padding_mode='zeros', align_corners=True) warped_front_left_PIL = torch.transpose(warped_front_left.unsqueeze(4), 1,
import cv2 import open3d as o3d main_folder = '/home/vbelissen/test_data/valeo_data_ready2train/data/dataset_valeo_cea_2017_2018/' seq_idx = '20170320_144339' img_idx = '00011702' path_to_theta_lut = [ main_folder + 'images/fisheye/train/' + seq_idx + '/cam_0/theta_tensor_1280_800.npy' ] poly_coeffs = torch.Tensor([282.85, -27.8671, 114.318, -36.6703]).unsqueeze(0) principal_point = torch.Tensor([0.046296, -7.33178]).unsqueeze(0) scale_factors = torch.Tensor([1., 1. / 1.00173]).unsqueeze(0) Tcw = Pose.identity(len(poly_coeffs)) Twc = Tcw.inverse() r = R.from_quat([1, 0, 0, 0]) depth_map_valeo = np.zeros((1, 1, 800, 1280)) depth_map_valeo[0, 0, :, :] = \ np.load(main_folder + 'depth_maps/fisheye/train/' + seq_idx + '/velodyne_0/' + seq_idx + '_velodyne_0_' + img_idx + '.npz')['velodyne_depth'] depth_map_valeo = depth_map_valeo.astype('float32') depth_map_valeo_tensor = torch.from_numpy(depth_map_valeo) def reconstruct(depth, frame='w'): """ Reconstructs pixel-wise 3D points from a depth map.
def forward(self, batch, return_logs=False, progress=0.0): """ Processes a batch. Parameters ---------- batch : dict Input batch return_logs : bool True if logs are stored progress : Training progress percentage Returns ------- output : dict Dictionary containing a "loss" scalar and different metrics and predictions for logging and downstream usage. """ # Calculate predicted depth and pose output output = super().forward(batch, return_logs=return_logs) if not self.training: # If not training, no need for self-supervised loss return output else: # Otherwise, calculate self-supervised loss self_sup_output = self.self_supervised_loss( batch['rgb_original'], batch['rgb_context_original'], output['inv_depths'], output['poses'], batch['intrinsics'], return_logs=return_logs, progress=progress) if len(batch['rgb_context'] ) == 2 and self.pose_consistency_loss_weight != 0.: pose_contexts = self.compute_poses(batch['rgb_context'][0], [batch['rgb_context'][1]]) pose_consistency_output = self.pose_consistency_loss( invert_pose(output['poses'][0].item()), output['poses'][1].item(), pose_contexts[0].item()) pose_consistency_output[ 'loss'] *= self.pose_consistency_loss_weight output = merge_outputs(output, pose_consistency_output) if self.identity_pose_loss_weight != 0: # Identity pose loss. pose(I_t, I_t+1) = pose(I_t+1, It)^⁻1 pose_vec_minus1_0 = self.pose_net(batch['rgb_context'][0], batch['rgb']) pose_vec_minus1_0 = Pose.from_vec(pose_vec_minus1_0, self.rotation_mode) identity_pose_output_minus1_0 = self.identity_pose_loss( output['poses'][0].item(), invert_pose(pose_vec_minus1_0)) pose_vec_01 = self.pose_net(batch['rgb_context'][1], batch['rgb']) pose_vec_01 = Pose.from_vec(pose_vec_01, self.rotation_mode) identity_pose_output_01 = self.identity_pose_loss( output['poses'][1].item(), invert_pose(pose_vec_01)) identity_pose_output_minus1_0[ 'loss'] *= self.identity_pose_loss_weight identity_pose_output_01[ 'loss'] *= self.identity_pose_loss_weight output = merge_outputs(output, identity_pose_output_minus1_0, identity_pose_output_01) if self.temporal_consistency_loss_weight != 0: # Temporal consistency: D_t = proj_on_t(D_t-1) + pose_t-1_t[3,3] temporal_consistency_loss = [] # for each ref image for j, (ref_image, pose) in enumerate( zip(batch['rgb_context'], output['poses'])): ref_warped_depths = warp_inv_depth(output['inv_depths'], batch['intrinsics'], batch['intrinsics'], pose) # add z from pose to warped depth ref_warped_depths = [ ref_warped_depth + pose[:, 3, 3] for ref_warped_depth in ref_warped_depths ] ref_inv_depths = self.compute_inv_depths(ref_image) ref_depths = [ inv2depth(ref_inv_depths[i]) for i in range(len(ref_inv_depths)) ] l1_loss = self.temporal_consistency_loss( ref_warped_depths, ref_depths) temporal_consistency_loss.append([i for i in l1_loss]) temporal_consistency_loss = temporal_consistency_loss.mean( ) * self.temporal_consistency_loss_weight output = merge_outputs(output, temporal_consistency_loss) # Return loss and metrics return { 'loss': self_sup_output['loss'], **merge_outputs(output, self_sup_output), }
def photo_loss_2imgs(i_cam1, i_cam2, rot_vect_list, save_pictures, rot_str): # Computes the photometric loss between 2 images of adjacent cameras # It reconstructs each image from the adjacent one, using calibration data, # depth prediction model and correction angles alpha, beta, gamma for i, i_cam in enumerate([i_cam1, i_cam2]): base_folder_str = get_base_folder( input_files[i_cam][i_file]) split_type_str = get_split_type(input_files[i_cam][i_file]) seq_name_str = get_sequence_name( input_files[i_cam][i_file]) camera_str = get_camera_name(input_files[i_cam][i_file]) calib_data = {} calib_data[camera_str] = read_raw_calib_files_camera_valeo( base_folder_str, split_type_str, seq_name_str, camera_str) pose_matrix = get_extrinsics_pose_matrix_extra_rot_torch( input_files[i_cam][i_file], calib_data, rot_vect_list[i]).unsqueeze(0) pose_tensor = Pose(pose_matrix).to('cuda:{}'.format( rank()), dtype=dtype) CameraFisheye.Twc.fget.cache_clear() cams[i_cam].Tcw = pose_tensor world_points1 = cams[i_cam1].reconstruct(pred_depths[i_cam1], frame='w') world_points2 = cams[i_cam2].reconstruct(pred_depths[i_cam2], frame='w') #depth1_wrt_cam2 = torch.norm(cams[i_cam2].Tcw @ world_points1, dim=1, keepdim=True) #depth2_wrt_cam1 = torch.norm(cams[i_cam1].Tcw @ world_points2, dim=1, keepdim=True) ref_coords1to2 = cams[i_cam2].project(world_points1, frame='w') ref_coords2to1 = cams[i_cam1].project(world_points2, frame='w') reconstructedImg2to1 = funct.grid_sample(images[i_cam2] * not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) reconstructedImg1to2 = funct.grid_sample(images[i_cam1] * not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) #print(reconstructedImg2to1) # z = reconstructedImg2to1.sum() # z.backward() if save_pictures: if epoch == 0: cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam1) + '_' + str(i_file) + '_orig.png', torch.transpose( (images[i_cam1][0, :, :, :] ).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam2) + '_' + str(i_file) + '_orig.png', torch.transpose( (images[i_cam2][0, :, :, :] ).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam1) + '_' + str(i_file) + '_recons_from_' + str(i_cam2) + '_rot' + rot_str + '.png', torch.transpose( ((reconstructedImg2to1 * not_masked[i_cam1] )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) cv2.imwrite( '/home/vbelissen/Downloads/cam_' + str(i_cam2) + '_' + str(i_file) + '_recons_from_' + str(i_cam1) + '_rot' + rot_str + '.png', torch.transpose( ((reconstructedImg1to2 * not_masked[i_cam2] )[0, :, :, :]).unsqueeze(0).unsqueeze(4), 1, 4).squeeze().detach().cpu().numpy() * 255) #reconstructedDepth2to1 = funct.grid_sample(pred_depths[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) #reconstructedDepth1to2 = funct.grid_sample(pred_depths[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) #reconstructedEgo2to1 = funct.grid_sample(not_masked[i_cam2], ref_coords1to2, mode='bilinear', padding_mode='zeros', align_corners=True) #reconstructedEgo1to2 = funct.grid_sample(not_masked[i_cam1], ref_coords2to1, mode='bilinear', padding_mode='zeros', align_corners=True) l1_loss_1 = torch.abs(images[i_cam1] * not_masked[i_cam1] - reconstructedImg2to1 * not_masked[i_cam1]) l1_loss_2 = torch.abs(images[i_cam2] * not_masked[i_cam2] - reconstructedImg1to2 * not_masked[i_cam2]) # SSIM loss ssim_loss_weight = 0.85 ssim_loss_1 = SSIM(images[i_cam1] * not_masked[i_cam1], reconstructedImg2to1 * not_masked[i_cam1], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_2 = SSIM(images[i_cam2] * not_masked[i_cam2], reconstructedImg1to2 * not_masked[i_cam2], C1=1e-4, C2=9e-4, kernel_size=3) ssim_loss_1 = torch.clamp((1. - ssim_loss_1) / 2., 0., 1.) ssim_loss_2 = torch.clamp((1. - ssim_loss_2) / 2., 0., 1.) # Weighted Sum: alpha * ssim + (1 - alpha) * l1 photometric_loss_1 = ssim_loss_weight * ssim_loss_1.mean( 1, True) + (1 - ssim_loss_weight) * l1_loss_1.mean( 1, True) photometric_loss_2 = ssim_loss_weight * ssim_loss_2.mean( 1, True) + (1 - ssim_loss_weight) * l1_loss_2.mean( 1, True) mask1 = photometric_loss_1 != 0 s1 = mask1.sum() loss_1 = (photometric_loss_1 * mask1).sum() / s1 if s1 > 0 else 0 mask2 = photometric_loss_2 != 0 s2 = mask2.sum() loss_2 = (photometric_loss_2 * mask2).sum() / s2 if s2 > 0 else 0 #valid1 = ... #valid2 = ... return loss_1 + loss_2
def warp_ref_image_tensor(self, inv_depths, ref_image, ref_tensor, path_to_theta_lut, path_to_ego_mask, poly_coeffs, principal_point, scale_factors, ref_path_to_theta_lut, ref_path_to_ego_mask, ref_poly_coeffs, ref_principal_point, ref_scale_factors, same_timestamp_as_origin, pose_matrix_context, pose): """ Warps a reference image to produce a reconstruction of the original one. Parameters ---------- inv_depths : torch.Tensor [B,1,H,W] Inverse depth map of the original image ref_image : torch.Tensor [B,3,H,W] Reference RGB image K : torch.Tensor [B,3,3] Original camera intrinsics ref_K : torch.Tensor [B,3,3] Reference camera intrinsics pose : Pose Original -> Reference camera transformation Returns ------- ref_warped : torch.Tensor [B,3,H,W] Warped reference image (reconstructing the original one) """ B, _, H, W = ref_image.shape device = ref_image.get_device() # Generate cameras for all scales cams, ref_cams = [], [] pose_matrix = torch.zeros(B, 4, 4) for b in range(B): if same_timestamp_as_origin[b]: pose_matrix[ b, :3, 3] = pose.mat[b, :3, :3] @ pose_matrix_context[b, :3, 3] pose_matrix[b, 3, 3] = 1 pose_matrix[b, :3, :3] = pose.mat[ b, :3, :3] @ pose_matrix_context[b, :3, :3] else: pose_matrix[b, :, :] = pose.mat[b, :, :] # pose_matrix = torch.zeros(B, 4, 4) # for b in range(B): # if not same_timestamp_as_origin[b]: # pose_matrix[b, :, :] = pose.mat[b, :, :] # else: # pose_matrix[b, :, :] = pose_matrix_context[b, :, :] pose_matrix = Pose(pose_matrix) for i in range(self.n): _, _, DH, DW = inv_depths[i].shape scale_factor = DW / float(W) cams.append( CameraFisheye(path_to_theta_lut=path_to_theta_lut, path_to_ego_mask=path_to_ego_mask, poly_coeffs=poly_coeffs.float(), principal_point=principal_point.float(), scale_factors=scale_factors.float()).scaled( scale_factor).to(device)) ref_cams.append( CameraFisheye(path_to_theta_lut=ref_path_to_theta_lut, path_to_ego_mask=ref_path_to_ego_mask, poly_coeffs=ref_poly_coeffs.float(), principal_point=ref_principal_point.float(), scale_factors=ref_scale_factors.float(), Tcw=pose_matrix).scaled(scale_factor).to(device)) # View synthesis depths = [inv2depth(inv_depths[i]) for i in range(self.n)] ref_images = match_scales(ref_image, inv_depths, self.n) ref_warped = [ view_synthesis(ref_images[i], depths[i], ref_cams[i], cams[i], padding_mode=self.padding_mode, mode='bilinear') for i in range(self.n) ] ref_tensors = match_scales(ref_tensor, inv_depths, self.n, mode='nearest', align_corners=None) ref_tensors_warped = [ view_synthesis(ref_tensors[i], depths[i], ref_cams[i], cams[i], padding_mode=self.padding_mode, mode='nearest', align_corners=None) for i in range(self.n) ] #print(ref_tensors[0][:, :, ::40, ::40]) #print(ref_tensors_warped[0][:, :, ::40, ::40]) # Return warped reference image return ref_warped, ref_tensors_warped