def test_perspective(self): cameras = PerspectiveCameras() P = cameras.get_projection_transform() vertices = torch.randn([3, 4, 3], dtype=torch.float32) v1 = P.transform_points(vertices) v2 = sfm_perspective_project_naive(vertices) self.assertClose(v1, v2)
def test_perspective_kwargs(self): cameras = PerspectiveCameras(focal_length=5.0, principal_point=((2.5, 2.5),)) P = cameras.get_projection_transform( focal_length=2.0, principal_point=((2.5, 3.5),) ) vertices = torch.randn([3, 4, 3], dtype=torch.float32) v1 = P.transform_points(vertices) v2 = sfm_perspective_project_naive(vertices, fx=2.0, fy=2.0, p0x=2.5, p0y=3.5) self.assertClose(v1, v2, atol=1e-6)
def test_getitem(self): R_matrix = torch.randn((6, 3, 3)) principal_point = torch.randn((6, 2, 1)) focal_length = 5.0 cam = PerspectiveCameras( R=R_matrix, focal_length=focal_length, principal_point=principal_point, ) # Check get item returns an instance of the same class # with all the same keys c0 = cam[0] self.assertTrue(isinstance(c0, PerspectiveCameras)) self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys()) # Check torch.LongTensor index index = torch.tensor([1, 3, 5], dtype=torch.int64) c135 = cam[index] self.assertEqual(len(c135), 3) self.assertClose(c135.focal_length, torch.tensor([[5.0, 5.0]] * 3)) self.assertClose(c135.R, R_matrix[[1, 3, 5], ...]) self.assertClose(c135.principal_point, principal_point[[1, 3, 5], ...]) # Check in_ndc is handled correctly self.assertEqual(cam._in_ndc, c0._in_ndc)
def _get_pytorch3d_camera( self, entry: types.FrameAnnotation, scale: float, clamp_bbox_xyxy: Optional[torch.Tensor], ) -> PerspectiveCameras: entry_viewpoint = entry.viewpoint assert entry_viewpoint is not None # principal point and focal length principal_point = torch.tensor(entry_viewpoint.principal_point, dtype=torch.float) focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) half_image_size_wh_orig = ( torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0) # first, we convert from the dataset's NDC convention to pixels format = entry_viewpoint.intrinsics_format if format.lower() == "ndc_norm_image_bounds": # this is e.g. currently used in CO3D for storing intrinsics rescale = half_image_size_wh_orig elif format.lower() == "ndc_isotropic": rescale = half_image_size_wh_orig.min() else: raise ValueError(f"Unknown intrinsics format: {format}") # principal point and focal length in pixels principal_point_px = half_image_size_wh_orig - principal_point * rescale focal_length_px = focal_length * rescale if self.box_crop: assert clamp_bbox_xyxy is not None principal_point_px -= clamp_bbox_xyxy[:2] # now, convert from pixels to PyTorch3D v0.5+ NDC convention if self.image_height is None or self.image_width is None: out_size = list(reversed(entry.image.size)) else: out_size = [self.image_width, self.image_height] half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0 half_min_image_size_output = half_image_size_output.min() # rescaled principal point and focal length in ndc principal_point = (half_image_size_output - principal_point_px * scale) / half_min_image_size_output focal_length = focal_length_px * scale / half_min_image_size_output return PerspectiveCameras( focal_length=focal_length[None], principal_point=principal_point[None], R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None], T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None], )
def test_perspective_scaled(self): focal_length_x = 10.0 focal_length_y = 15.0 p0x = 15.0 p0y = 30.0 cameras = PerspectiveCameras( focal_length=((focal_length_x, focal_length_y),), principal_point=((p0x, p0y),), ) P = cameras.get_projection_transform() vertices = torch.randn([3, 4, 3], dtype=torch.float32) v1 = P.transform_points(vertices) v2 = sfm_perspective_project_naive( vertices, fx=focal_length_x, fy=focal_length_y, p0x=p0x, p0y=p0y ) v3 = cameras.transform_points(vertices) self.assertClose(v1, v2) self.assertClose(v3[..., :2], v2[..., :2])
def test_gm(self): # Simple test of a forward pass of the default GenericModel. device = torch.device("cuda:1") expand_args_fields(GenericModel) model = GenericModel() model.to(device) n_train_cameras = 2 R, T = look_at_view_transform(azim=torch.rand(n_train_cameras) * 360) cameras = PerspectiveCameras(R=R, T=T, device=device) # TODO: make these default to None? defaulted_args = { "fg_probability": None, "depth_map": None, "mask_crop": None, "sequence_name": None, } with self.assertWarnsRegex(UserWarning, "No main objective found"): model( camera=cameras, evaluation_mode=EvaluationMode.TRAINING, **defaulted_args, image_rgb=None, ) target_image_rgb = torch.rand( (n_train_cameras, 3, model.render_image_height, model.render_image_width), device=device, ) train_preds = model( camera=cameras, evaluation_mode=EvaluationMode.TRAINING, image_rgb=target_image_rgb, **defaulted_args, ) self.assertGreater(train_preds["objective"].item(), 0) model.eval() with torch.no_grad(): # TODO: perhaps this warning should be skipped in eval mode? with self.assertWarnsRegex(UserWarning, "No main objective found"): eval_preds = model( camera=cameras[0], **defaulted_args, image_rgb=None, ) self.assertEqual( eval_preds["images_render"].shape, (1, 3, model.render_image_height, model.render_image_width), )
def __init__(self): super(SceneModel, self).__init__() self.gamma = 1.0 # Points. torch.manual_seed(1) vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32, device=DEVICE) * 10.0 vert_pos[:, 2] += 25.0 vert_pos[:, :2] -= 5.0 self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=True)) self.register_parameter( "vert_col", nn.Parameter( torch.ones(N_POINTS, 3, dtype=torch.float32, device=DEVICE) * 0.5, requires_grad=True, ), ) self.register_parameter( "vert_rad", nn.Parameter( torch.ones(N_POINTS, dtype=torch.float32) * 0.3, requires_grad=True ), ) self.register_buffer( "cam_params", torch.tensor( [0.0, 0.0, 0.0, 0.0, math.pi, 0.0, 5.0, 2.0], dtype=torch.float32 ), ) self.cameras = PerspectiveCameras( # The focal length must be double the size for PyTorch3D because of the NDC # coordinates spanning a range of two - and they must be normalized by the # sensor width (see the pulsar example). This means we need here # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar. focal_length=5.0, R=torch.eye(3, dtype=torch.float32, device=DEVICE)[None, ...], T=torch.zeros((1, 3), dtype=torch.float32, device=DEVICE), image_size=((HEIGHT, WIDTH),), device=DEVICE, ) raster_settings = PointsRasterizationSettings( image_size=(HEIGHT, WIDTH), radius=self.vert_rad, ) rasterizer = PointsRasterizer( cameras=self.cameras, raster_settings=raster_settings ) self.renderer = PulsarPointsRenderer(rasterizer=rasterizer, n_track=32)
def test_simple_sphere_pulsar(self): for device in [torch.device("cpu"), torch.device("cuda")]: sphere_mesh = ico_sphere(1, device) verts_padded = sphere_mesh.verts_padded() # Shift vertices to check coordinate frames are correct. verts_padded[..., 1] += 0.2 verts_padded[..., 0] += 0.2 pointclouds = Pointclouds( points=verts_padded, features=torch.ones_like(verts_padded) ) for azimuth in [0.0, 90.0]: R, T = look_at_view_transform(2.7, 0.0, azimuth) for camera_name, cameras in [ ("fovperspective", FoVPerspectiveCameras(device=device, R=R, T=T)), ( "fovorthographic", FoVOrthographicCameras(device=device, R=R, T=T), ), ("perspective", PerspectiveCameras(device=device, R=R, T=T)), ("orthographic", OrthographicCameras(device=device, R=R, T=T)), ]: raster_settings = PointsRasterizationSettings( image_size=256, radius=5e-2, points_per_pixel=1 ) rasterizer = PointsRasterizer( cameras=cameras, raster_settings=raster_settings ) renderer = PulsarPointsRenderer(rasterizer=rasterizer).to(device) # Load reference image filename = ( "pulsar_simple_pointcloud_sphere_" f"azimuth{azimuth}_{camera_name}.png" ) image_ref = load_rgb_image("test_%s" % filename, DATA_DIR) images = renderer( pointclouds, gamma=(1e-3,), znear=(1.0,), zfar=(100.0,) ) rgb = images[0, ..., :3].squeeze().cpu() if DEBUG: filename = "DEBUG_%s" % filename Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save( DATA_DIR / filename ) self.assertClose(rgb, image_ref, rtol=7e-3, atol=5e-3)
def test_join_cameras_as_batch_errors(self): cam0 = PerspectiveCameras(device="cuda:0") cam1 = OrthographicCameras(device="cuda:0") # Cameras not of the same type with self.assertRaisesRegex(ValueError, "same type"): join_cameras_as_batch([cam0, cam1]) cam2 = OrthographicCameras(device="cpu") # Cameras not on the same device with self.assertRaisesRegex(ValueError, "same device"): join_cameras_as_batch([cam1, cam2]) cam3 = OrthographicCameras(in_ndc=False, device="cuda:0") # Different coordinate systems -- all should be in ndc or in screen with self.assertRaisesRegex( ValueError, "Attribute _in_ndc is not constant across inputs" ): join_cameras_as_batch([cam1, cam3])
def test_invert_eye_at_up(self): # Generate random cameras and check we can reconstruct their eye, at, # and up vectors. N = 13 eye = torch.rand(N, 3) at = torch.rand(N, 3) up = torch.rand(N, 3) R, T = look_at_view_transform(eye=eye, at=at, up=up) cameras = PerspectiveCameras(R=R, T=T) eye2, at2, up2 = camera_to_eye_at_up( cameras.get_world_to_view_transform()) # The retrieved eye matches self.assertClose(eye, eye2, atol=1e-5) self.assertClose(cameras.get_camera_center(), eye) # at-eye as retrieved must be a vector in the same direction as # the original. self.assertClose(normalize(at - eye), normalize(at2 - eye2)) # The up vector as retrieved should be rotated the same amount # around at-eye as the original. The component in the at-eye # direction is unimportant, as is the length. # So check that (up x (at-eye)) as retrieved is in the same # direction as its original value. up_check = torch.cross(up, at - eye, dim=-1) up_check2 = torch.cross(up2, at - eye, dim=-1) self.assertClose(normalize(up_check), normalize(up_check2)) # Master check that we get the same camera if we reinitialise. R2, T2 = look_at_view_transform(eye=eye2, at=at2, up=up2) cameras2 = PerspectiveCameras(R=R2, T=T2) cam_trans = cameras.get_world_to_view_transform() cam_trans2 = cameras2.get_world_to_view_transform() self.assertClose(cam_trans.get_matrix(), cam_trans2.get_matrix(), atol=1e-5)
def test_to(self): cpu_device = torch.device("cpu") cuda_device = torch.device("cuda:0") R, T = look_at_view_transform() shader_classes = [ HardFlatShader, HardGouraudShader, HardPhongShader, SoftPhongShader, ] for shader_class in shader_classes: for cameras_class in (None, PerspectiveCameras): if cameras_class is None: cameras = None else: cameras = PerspectiveCameras(device=cpu_device, R=R, T=T) cpu_shader = shader_class(device=cpu_device, cameras=cameras) if cameras is None: self.assertIsNone(cpu_shader.cameras) else: self.assertEqual(cpu_device, cpu_shader.cameras.device) self.assertEqual(cpu_device, cpu_shader.materials.device) self.assertEqual(cpu_device, cpu_shader.lights.device) cuda_shader = cpu_shader.to(cuda_device) self.assertIs(cpu_shader, cuda_shader) if cameras is None: self.assertIsNone(cuda_shader.cameras) else: self.assertEqual(cuda_device, cuda_shader.cameras.device) self.assertEqual(cuda_device, cuda_shader.materials.device) self.assertEqual(cuda_device, cuda_shader.lights.device)
def test_perspective_type(self): cam = PerspectiveCameras(focal_length=5.0, principal_point=((2.5, 2.5), )) self.assertTrue(cam.is_perspective()) self.assertEquals(cam.get_znear(), None)
def test_unified_inputs_pulsar(self): # Test data on different devices. for device in [torch.device("cpu"), torch.device("cuda")]: sphere_mesh = ico_sphere(1, device) verts_padded = sphere_mesh.verts_padded() pointclouds = Pointclouds( points=verts_padded, features=torch.ones_like(verts_padded) ) R, T = look_at_view_transform(2.7, 0.0, 0.0) # Test the different camera types. for _, cameras in [ ("fovperspective", FoVPerspectiveCameras(device=device, R=R, T=T)), ( "fovorthographic", FoVOrthographicCameras(device=device, R=R, T=T), ), ("perspective", PerspectiveCameras(device=device, R=R, T=T)), ("orthographic", OrthographicCameras(device=device, R=R, T=T)), ]: # Test different ways for image size specification. for image_size in (256, (256, 256)): raster_settings = PointsRasterizationSettings( image_size=image_size, radius=5e-2, points_per_pixel=1 ) rasterizer = PointsRasterizer( cameras=cameras, raster_settings=raster_settings ) # Test that the compositor can be provided. It's value is ignored # so use a dummy. _ = PulsarPointsRenderer(rasterizer=rasterizer, compositor=1).to( device ) # Constructor without compositor. _ = PulsarPointsRenderer(rasterizer=rasterizer).to(device) # Constructor with n_channels. _ = PulsarPointsRenderer(rasterizer=rasterizer, n_channels=3).to( device ) # Constructor with max_num_spheres. renderer = PulsarPointsRenderer( rasterizer=rasterizer, max_num_spheres=1000 ).to(device) # Test the forward function. if isinstance(cameras, (PerspectiveCameras, OrthographicCameras)): # znear and zfar is required in this case. self.assertRaises( ValueError, lambda: renderer.forward( point_clouds=pointclouds, gamma=(1e-4,) ), ) renderer.forward( point_clouds=pointclouds, gamma=(1e-4,), znear=(1.0,), zfar=(2.0,), ) # znear and zfar must be batched. self.assertRaises( TypeError, lambda: renderer.forward( point_clouds=pointclouds, gamma=(1e-4,), znear=1.0, zfar=(2.0,), ), ) self.assertRaises( TypeError, lambda: renderer.forward( point_clouds=pointclouds, gamma=(1e-4,), znear=(1.0,), zfar=2.0, ), ) else: # gamma must be batched. self.assertRaises( TypeError, lambda: renderer.forward( point_clouds=pointclouds, gamma=1e-4 ), ) renderer.forward(point_clouds=pointclouds, gamma=(1e-4,)) # rasterizer width and height change. renderer.rasterizer.raster_settings.image_size = 0 self.assertRaises( ValueError, lambda: renderer.forward( point_clouds=pointclouds, gamma=(1e-4,) ), )
def __init__(self): super(SceneModel, self).__init__() self.gamma = 0.1 # Points. torch.manual_seed(1) vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0 vert_pos[:, 2] += 25.0 vert_pos[:, :2] -= 5.0 self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=False)) self.register_parameter( "vert_col", nn.Parameter( torch.rand(N_POINTS, 3, dtype=torch.float32), requires_grad=False, ), ) self.register_parameter( "vert_rad", nn.Parameter( torch.rand(N_POINTS, dtype=torch.float32), requires_grad=False, ), ) self.register_parameter( "cam_pos", nn.Parameter( torch.tensor([0.1, 0.1, 0.0], dtype=torch.float32), requires_grad=True, ), ) self.register_parameter( "cam_rot", # We're using the 6D rot. representation for better gradients. nn.Parameter( axis_angle_to_matrix( torch.tensor( [ [0.02, 0.02, 0.01], ], dtype=torch.float32, ))[0], requires_grad=True, ), ) self.register_parameter( "focal_length", nn.Parameter( torch.tensor( [ 4.8 * 2.0 / 2.0, ], dtype=torch.float32, ), requires_grad=True, ), ) self.cameras = PerspectiveCameras( # The focal length must be double the size for PyTorch3D because of the NDC # coordinates spanning a range of two - and they must be normalized by the # sensor width (see the pulsar example). This means we need here # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar. # # R, T and f are provided here, but will be provided again # at every call to the forward method. The reason are problems # with PyTorch which makes device placement for gradients problematic # for tensors which are themselves on a 'gradient path' but not # leafs in the calculation tree. This will be addressed by an architectural # change in PyTorch3D in the future. Until then, this workaround is # recommended. focal_length=self.focal_length, R=self.cam_rot[None, ...], T=self.cam_pos[None, ...], image_size=((HEIGHT, WIDTH), ), device=DEVICE, ) raster_settings = PointsRasterizationSettings( image_size=(HEIGHT, WIDTH), radius=self.vert_rad, ) rasterizer = PointsRasterizer(cameras=self.cameras, raster_settings=raster_settings) self.renderer = PulsarPointsRenderer(rasterizer=rasterizer)
def main(args): # set for reproducibility torch.manual_seed(42) if args.dtype == "float": args.dtype = torch.float32 elif args.dtype == "double": args.dtype = torch.float64 # ## 1. Set up Cameras and load ground truth positions # load the SE3 graph of relative/absolute camera positions if (args.input_folder / "images.bin").isfile(): ext = '.bin' elif (args.input_folder / "images.txt").isfile(): ext = '.txt' else: print('error') return cameras, images, points3D = read_model(args.input_folder, ext) images_df = pd.DataFrame.from_dict(images, orient="index").set_index("id") cameras_df = pd.DataFrame.from_dict(cameras, orient="index").set_index("id") points_df = pd.DataFrame.from_dict(points3D, orient="index").set_index("id") print(points_df) print(images_df) print(cameras_df) ref_pointcloud = PyntCloud.from_file(args.ply) ref_pointcloud = torch.from_numpy(ref_pointcloud.xyz).to(device, dtype=args.dtype) points_3d = np.stack(points_df["xyz"].values) points_3d = torch.from_numpy(points_3d).to(device, dtype=args.dtype) cameras_R = np.stack( [qvec2rotmat(q) for _, q in images_df["qvec"].iteritems()]) cameras_R = torch.from_numpy(cameras_R).to(device, dtype=args.dtype).transpose( 1, 2) cameras_T = torch.from_numpy(np.stack(images_df["tvec"].values)).to( device, dtype=args.dtype) cameras_params = torch.from_numpy(np.stack( cameras_df["params"].values)).to(device, dtype=args.dtype) cameras_params = cameras_params[:, :4] print(cameras_params) # Constructu visibility map, True at (frame, point) if point is visible by frame, False otherwise # Thus, we can ignore reprojection errors for invisible points visibility = np.full((cameras_R.shape[0], points_3d.shape[0]), False) visibility = pd.DataFrame(visibility, index=images_df.index, columns=points_df.index) points_2D_gt = [] for idx, (pts_ids, xy) in images_df[["point3D_ids", "xys"]].iterrows(): pts_ids_clean = pts_ids[pts_ids != -1] pts_2D = pd.DataFrame(xy[pts_ids != -1], index=pts_ids_clean) pts_2D = pts_2D[~pts_2D.index.duplicated(keep=False)].reindex( points_df.index).dropna() points_2D_gt.append(pts_2D.values) visibility.loc[idx, pts_2D.index] = True print(visibility) visibility = torch.from_numpy(visibility.values).to(device) eps = 1e-3 # Visibility map is very sparse. So we can use Pytorch3d's function to reduce points_2D size # to (num_frames, max points seen by frame) points_2D_gt = list_to_padded([torch.from_numpy(p) for p in points_2D_gt], pad_value=eps).to(device, dtype=args.dtype) print(points_2D_gt) cameras_df["raw_id"] = np.arange(len(cameras_df)) cameras_id_per_image = torch.from_numpy( cameras_df["raw_id"][images_df["camera_id"]].values).to(device) # the number of absolute camera positions N = len(images_df) nonzer = (points_2D_gt != eps).all(dim=-1) # print(padded) # print(points_2D_gt, points_2D_gt.shape) # ## 2. Define optimization functions # # ### Relative cameras and camera distance # We now define two functions crucial for the optimization. # # **`calc_camera_distance`** compares a pair of cameras. # This function is important as it defines the loss that we are minimizing. # The method utilizes the `so3_relative_angle` function from the SO3 API. # # **`get_relative_camera`** computes the parameters of a relative camera # that maps between a pair of absolute cameras. Here we utilize the `compose` # and `inverse` class methods from the PyTorch3D Transforms API. def calc_camera_distance(cam_1, cam_2): """ Calculates the divergence of a batch of pairs of cameras cam_1, cam_2. The distance is composed of the cosine of the relative angle between the rotation components of the camera extrinsics and the l2 distance between the translation vectors. """ # rotation distance R_distance = ( 1. - so3_relative_angle(cam_1.R, cam_2.R, cos_angle=True)).mean() # translation distance T_distance = ((cam_1.T - cam_2.T)**2).sum(1).mean() # the final distance is the sum return R_distance + T_distance # ## 3. Optimization # Finally, we start the optimization of the absolute cameras. # # We use SGD with momentum and optimize over `log_R_absolute` and `T_absolute`. # # As mentioned earlier, `log_R_absolute` is the axis angle representation of the # rotation part of our absolute cameras. We can obtain the 3x3 rotation matrix # `R_absolute` that corresponds to `log_R_absolute` with: # # `R_absolute = so3_exponential_map(log_R_absolute)` # fxfyu0v0 = cameras_params[cameras_id_per_image] cameras_absolute_gt = PerspectiveCameras( focal_length=fxfyu0v0[:, :2], principal_point=fxfyu0v0[:, 2:], R=cameras_R, T=cameras_T, device=device, ) # Normally, the points_2d are the one we should use to minimize reprojection errors. # But we have been dealing with unstability, so we can reproject the 3D points instead and use their reprojection # since we assume Colmap's bundle adjuster to have converged alone before. use_3d_points = True if use_3d_points: with torch.no_grad(): padded_points = list_to_padded( [points_3d[visibility[c]] for c in range(N)], pad_value=1e-3) points_2D_gt = cameras_absolute_gt.transform_points( padded_points, eps=1e-4)[:, :, :2] relative_points_gt = padded_points @ cameras_R + cameras_T # Starting point is normally points_3d and camera_R and camera_T # For stability test, you can try to add noise and see if the otpitmization # gets back to intial state (spoiler alert, it's complicated) # Set noise and shift to 0 for a normal starting point noise = 0 shift = 0.1 points_init = points_3d + noise * torch.randn( points_3d.shape, dtype=torch.float32, device=device) + shift log_R_init = so3_log_map(cameras_R) + noise * torch.randn( N, 3, dtype=torch.float32, device=device) T_init = cameras_T + noise * torch.randn( cameras_T.shape, dtype=torch.float32, device=device) - shift cams_init = cameras_params # + noise * torch.randn(cameras_params.shape, dtype=torch.float32, device=device) # instantiate a copy of the initialization of log_R / T log_R = log_R_init.clone().detach() log_R.requires_grad = True T = T_init.clone().detach() T.requires_grad = True cams_params = cams_init.clone().detach() cams_params.requires_grad = True points = points_init.clone().detach() points.requires_grad = True # init the optimizer # Different learning rates per parameter ? By intuition I'd say that it should be higher for T and lower for log_R # Params could be optimized as well but it's unlikely to be interesting param_groups = [{ 'params': points, 'lr': args.lr }, { 'params': log_R, 'lr': 0.1 * args.lr }, { 'params': T, 'lr': 2 * args.lr }, { 'params': cams_params, 'lr': 0 }] optimizer = torch.optim.SGD(param_groups, lr=args.lr, momentum=0.9) # run the optimization n_iter = 200000 # fix the number of iterations # Compute inliers # In the model, some 3d points have their reprojection way off compared to the # target 2d point. It is potentially a great source of instability. inliers is # keeping track of those problematic points to discard them from optimization discard_outliers = True if discard_outliers: with torch.no_grad(): padded_points = list_to_padded( [points_3d[visibility[c]] for c in range(N)], pad_value=1e-3) projected_points = cameras_absolute_gt.transform_points( padded_points, eps=1e-4)[:, :, :2] points_distance = ((projected_points[nonzer] - points_2D_gt[nonzer])**2).sum(dim=1) inliers = (points_distance < 100).clone().detach() print(inliers) else: inliers = points_2D_gt[nonzer] == points_2D_gt[ nonzer] # All true, except NaNs loss_log = [] cam_dist_log = [] pts_dist_log = [] for it in range(n_iter): # re-init the optimizer gradients optimizer.zero_grad() R = so3_exponential_map(log_R) fxfyu0v0 = cams_params[cameras_id_per_image] # get the current absolute cameras cameras_absolute = PerspectiveCameras( focal_length=fxfyu0v0[:, :2], principal_point=fxfyu0v0[:, 2:], R=R, T=T, device=device, ) padded_points = list_to_padded( [points[visibility[c]] for c in range(N)], pad_value=1e-3) # two ways of optimizing : # 1) minimize 2d projection error. Potentially unstable, especially with very close points. # This is problematic as close points are the ones with which we want the pose modification to be low # but gradient descent makes them with the highest step size. We can maybe use Adam, but unstability remains. # # 2) minimize 3d relative position error (initial 3d relative position is considered groundtruth). No more unstability for very close points. # 2d reprojection error is not guaranteed to be minimized though minimize_2d = True chamfer_weight = 1e3 verbose = True chamfer_dist = chamfer_distance(ref_pointcloud[None], points[None])[0] if minimize_2d: projected_points_3D = cameras_absolute.transform_points( padded_points, eps=1e-4)[..., :2] projected_points = projected_points_3D[:, :, :2] # Discard points with a depth < 0 (theoretically impossible) inliers = inliers & (projected_points_3D[:, :, 2][nonzer] > 0) # Plot point distants for first image # distances = (projected_points[0] - points_2D_gt[0]).norm(dim=-1).detach().cpu().numpy() # from matplotlib import pyplot as plt # plt.plot(distances[:(visibility[0]).sum()]) # Different loss functions for reprojection error minimization # points_distance = smooth_l1_loss(projected_points, points_2D_gt) # points_distance = (smooth_l1_loss(projected_points, points_2D_gt, reduction='none')[nonzer]).sum(dim=1) proj_error = ((projected_points[nonzer] - points_2D_gt[nonzer])**2).sum(dim=1) proj_error_filtered = proj_error[inliers] else: projected_points_3D = padded_points @ R + T # Plot point distants for first image # distances = (projected_points_3D[0] - relative_points_gt[0]).norm(dim=-1).detach().cpu().numpy() # from matplotlib import pyplot as plt # plt.plot(distances[:(visibility[0]).sum()]) # Different loss functions for reprojection error minimization # points_distance = smooth_l1_loss(projected_points, points_2D_gt) # points_distance = (smooth_l1_loss(projected_points, points_2D_gt, reduction='none')[nonzer]).sum(dim=1) proj_error = ((projected_points_3D[nonzer] - relative_points_gt[nonzer])**2).sum(dim=1) proj_error_filtered = proj_error[inliers] loss = proj_error_filtered.mean() + chamfer_weight * chamfer_dist loss.backward() if verbose: print("faulty elements (with nan grad) :") faulty_points = torch.arange( points.shape[0])[points.grad[:, 0] != points.grad[:, 0]] faulty_images = torch.arange( log_R.shape[0])[log_R.grad[:, 0] != log_R.grad[:, 0]] faulty_cams = torch.arange(cams_params.shape[0])[ cams_params.grad[:, 0] != cams_params.grad[:, 0]] faulty_projected_points = torch.arange( projected_points.shape[1])[torch.isnan( projected_points.grad).any(dim=2)[0]] # Print Tensor that would become NaN, should the gradient be applied print("Faulty Rotation (log) and translation") print(faulty_images) print(log_R[faulty_images]) print(T[faulty_images]) print("Faulty 3D colmap points") print(faulty_points) print(points[faulty_points]) print("Faulty Cameras") print(faulty_cams) print(cams_params[faulty_cams]) print("Faulty 2D points") print(projected_points[faulty_projected_points]) first_faulty_point = points_df.iloc[int(faulty_points[0])] related_faulty_images = images_df.loc[ first_faulty_point["image_ids"][0]] print("First faulty point, and images where it is seen") print(first_faulty_point) print(related_faulty_images) # apply the gradients optimizer.step() # plot and print status message if it % 2000 == 0 or it == n_iter - 1: camera_distance = calc_camera_distance(cameras_absolute, cameras_absolute_gt) print( 'iteration = {}; loss = {}, chamfer distance = {}, camera_distance = {}' .format(it, loss, chamfer_distance, camera_distance)) loss_log.append(loss.item()) pts_dist_log.append(chamfer_distance.item()) cam_dist_log.append(camera_distance.item()) if it % 20000 == 0 or it == n_iter - 1: with torch.no_grad(): from matplotlib import pyplot as plt plt.hist( torch.sqrt(proj_error_filtered).detach().cpu().numpy()) if it % 200000 == 0 or it == n_iter - 1: plt.figure() plt.plot(loss_log) plt.figure() plt.plot(pts_dist_log, label="chamfer_dist") plt.plot(cam_dist_log, label="cam_dist") plt.legend() plot_camera_scene( cameras_absolute, cameras_absolute_gt, points, ref_pointcloud, 'iteration={}; chamfer distance={}'.format( it, chamfer_distance)) print('Optimization finished.')
def show_depth(self, depth_loss: float, name_postfix: str, loss_mask_now: torch.Tensor): self._viz.images( torch.cat( ( make_depth_image(self.depth_render, loss_mask_now), make_depth_image(self.depth_map, loss_mask_now), ), dim=3, ), env=self.visdom_env, win="depth_abs" + name_postfix, opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}"}, ) self._viz.images( loss_mask_now, env=self.visdom_env, win="depth_abs" + name_postfix + "_mask", opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_mask"}, ) self._viz.images( self.depth_mask, env=self.visdom_env, win="depth_abs" + name_postfix + "_maskd", opts={ "title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_maskd" }, ) # show the 3D plot # pyre-fixme[9]: viewpoint_trivial has type `PerspectiveCameras`; used as # `TensorProperties`. viewpoint_trivial: PerspectiveCameras = PerspectiveCameras().to( loss_mask_now.device) pcl_pred = get_rgbd_point_cloud( viewpoint_trivial, self.image_render, self.depth_render, # mask_crop, torch.ones_like(self.depth_render), # loss_mask_now, ) pcl_gt = get_rgbd_point_cloud( viewpoint_trivial, self.image_rgb_masked, self.depth_map, # mask_crop, torch.ones_like(self.depth_map), # loss_mask_now, ) _pcls = { pn: p for pn, p in zip(("pred_depth", "gt_depth"), (pcl_pred, pcl_gt)) if int(p.num_points_per_cloud()) > 0 } plotlyplot = plot_scene( {f"pcl{name_postfix}": _pcls}, camera_scale=1.0, pointcloud_max_points=10000, pointcloud_marker_size=1, ) self._viz.plotlyplot( plotlyplot, env=self.visdom_env, win=f"pcl{name_postfix}", )