Exemple #1
0
    def test_perspective(self):
        cameras = PerspectiveCameras()
        P = cameras.get_projection_transform()

        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
        v1 = P.transform_points(vertices)
        v2 = sfm_perspective_project_naive(vertices)
        self.assertClose(v1, v2)
 def test_perspective_kwargs(self):
     cameras = PerspectiveCameras(focal_length=5.0, principal_point=((2.5, 2.5),))
     P = cameras.get_projection_transform(
         focal_length=2.0, principal_point=((2.5, 3.5),)
     )
     vertices = torch.randn([3, 4, 3], dtype=torch.float32)
     v1 = P.transform_points(vertices)
     v2 = sfm_perspective_project_naive(vertices, fx=2.0, fy=2.0, p0x=2.5, p0y=3.5)
     self.assertClose(v1, v2, atol=1e-6)
Exemple #3
0
    def test_getitem(self):
        R_matrix = torch.randn((6, 3, 3))
        principal_point = torch.randn((6, 2, 1))
        focal_length = 5.0
        cam = PerspectiveCameras(
            R=R_matrix,
            focal_length=focal_length,
            principal_point=principal_point,
        )

        # Check get item returns an instance of the same class
        # with all the same keys
        c0 = cam[0]
        self.assertTrue(isinstance(c0, PerspectiveCameras))
        self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys())

        # Check torch.LongTensor index
        index = torch.tensor([1, 3, 5], dtype=torch.int64)
        c135 = cam[index]
        self.assertEqual(len(c135), 3)
        self.assertClose(c135.focal_length, torch.tensor([[5.0, 5.0]] * 3))
        self.assertClose(c135.R, R_matrix[[1, 3, 5], ...])
        self.assertClose(c135.principal_point, principal_point[[1, 3, 5], ...])

        # Check in_ndc is handled correctly
        self.assertEqual(cam._in_ndc, c0._in_ndc)
Exemple #4
0
    def _get_pytorch3d_camera(
        self,
        entry: types.FrameAnnotation,
        scale: float,
        clamp_bbox_xyxy: Optional[torch.Tensor],
    ) -> PerspectiveCameras:
        entry_viewpoint = entry.viewpoint
        assert entry_viewpoint is not None
        # principal point and focal length
        principal_point = torch.tensor(entry_viewpoint.principal_point,
                                       dtype=torch.float)
        focal_length = torch.tensor(entry_viewpoint.focal_length,
                                    dtype=torch.float)

        half_image_size_wh_orig = (
            torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) /
            2.0)

        # first, we convert from the dataset's NDC convention to pixels
        format = entry_viewpoint.intrinsics_format
        if format.lower() == "ndc_norm_image_bounds":
            # this is e.g. currently used in CO3D for storing intrinsics
            rescale = half_image_size_wh_orig
        elif format.lower() == "ndc_isotropic":
            rescale = half_image_size_wh_orig.min()
        else:
            raise ValueError(f"Unknown intrinsics format: {format}")

        # principal point and focal length in pixels
        principal_point_px = half_image_size_wh_orig - principal_point * rescale
        focal_length_px = focal_length * rescale
        if self.box_crop:
            assert clamp_bbox_xyxy is not None
            principal_point_px -= clamp_bbox_xyxy[:2]

        # now, convert from pixels to PyTorch3D v0.5+ NDC convention
        if self.image_height is None or self.image_width is None:
            out_size = list(reversed(entry.image.size))
        else:
            out_size = [self.image_width, self.image_height]

        half_image_size_output = torch.tensor(out_size,
                                              dtype=torch.float) / 2.0
        half_min_image_size_output = half_image_size_output.min()

        # rescaled principal point and focal length in ndc
        principal_point = (half_image_size_output - principal_point_px *
                           scale) / half_min_image_size_output
        focal_length = focal_length_px * scale / half_min_image_size_output

        return PerspectiveCameras(
            focal_length=focal_length[None],
            principal_point=principal_point[None],
            R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None],
            T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None],
        )
    def test_perspective_scaled(self):
        focal_length_x = 10.0
        focal_length_y = 15.0
        p0x = 15.0
        p0y = 30.0

        cameras = PerspectiveCameras(
            focal_length=((focal_length_x, focal_length_y),),
            principal_point=((p0x, p0y),),
        )
        P = cameras.get_projection_transform()

        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
        v1 = P.transform_points(vertices)
        v2 = sfm_perspective_project_naive(
            vertices, fx=focal_length_x, fy=focal_length_y, p0x=p0x, p0y=p0y
        )
        v3 = cameras.transform_points(vertices)
        self.assertClose(v1, v2)
        self.assertClose(v3[..., :2], v2[..., :2])
Exemple #6
0
    def test_gm(self):
        # Simple test of a forward pass of the default GenericModel.
        device = torch.device("cuda:1")
        expand_args_fields(GenericModel)
        model = GenericModel()
        model.to(device)

        n_train_cameras = 2
        R, T = look_at_view_transform(azim=torch.rand(n_train_cameras) * 360)
        cameras = PerspectiveCameras(R=R, T=T, device=device)

        # TODO: make these default to None?
        defaulted_args = {
            "fg_probability": None,
            "depth_map": None,
            "mask_crop": None,
            "sequence_name": None,
        }

        with self.assertWarnsRegex(UserWarning, "No main objective found"):
            model(
                camera=cameras,
                evaluation_mode=EvaluationMode.TRAINING,
                **defaulted_args,
                image_rgb=None,
            )
        target_image_rgb = torch.rand(
            (n_train_cameras, 3, model.render_image_height, model.render_image_width),
            device=device,
        )
        train_preds = model(
            camera=cameras,
            evaluation_mode=EvaluationMode.TRAINING,
            image_rgb=target_image_rgb,
            **defaulted_args,
        )
        self.assertGreater(train_preds["objective"].item(), 0)

        model.eval()
        with torch.no_grad():
            # TODO: perhaps this warning should be skipped in eval mode?
            with self.assertWarnsRegex(UserWarning, "No main objective found"):
                eval_preds = model(
                    camera=cameras[0],
                    **defaulted_args,
                    image_rgb=None,
                )
        self.assertEqual(
            eval_preds["images_render"].shape,
            (1, 3, model.render_image_height, model.render_image_width),
        )
Exemple #7
0
 def __init__(self):
     super(SceneModel, self).__init__()
     self.gamma = 1.0
     # Points.
     torch.manual_seed(1)
     vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32, device=DEVICE) * 10.0
     vert_pos[:, 2] += 25.0
     vert_pos[:, :2] -= 5.0
     self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=True))
     self.register_parameter(
         "vert_col",
         nn.Parameter(
             torch.ones(N_POINTS, 3, dtype=torch.float32, device=DEVICE) * 0.5,
             requires_grad=True,
         ),
     )
     self.register_parameter(
         "vert_rad",
         nn.Parameter(
             torch.ones(N_POINTS, dtype=torch.float32) * 0.3, requires_grad=True
         ),
     )
     self.register_buffer(
         "cam_params",
         torch.tensor(
             [0.0, 0.0, 0.0, 0.0, math.pi, 0.0, 5.0, 2.0], dtype=torch.float32
         ),
     )
     self.cameras = PerspectiveCameras(
         # The focal length must be double the size for PyTorch3D because of the NDC
         # coordinates spanning a range of two - and they must be normalized by the
         # sensor width (see the pulsar example). This means we need here
         # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar.
         focal_length=5.0,
         R=torch.eye(3, dtype=torch.float32, device=DEVICE)[None, ...],
         T=torch.zeros((1, 3), dtype=torch.float32, device=DEVICE),
         image_size=((HEIGHT, WIDTH),),
         device=DEVICE,
     )
     raster_settings = PointsRasterizationSettings(
         image_size=(HEIGHT, WIDTH),
         radius=self.vert_rad,
     )
     rasterizer = PointsRasterizer(
         cameras=self.cameras, raster_settings=raster_settings
     )
     self.renderer = PulsarPointsRenderer(rasterizer=rasterizer, n_track=32)
 def test_simple_sphere_pulsar(self):
     for device in [torch.device("cpu"), torch.device("cuda")]:
         sphere_mesh = ico_sphere(1, device)
         verts_padded = sphere_mesh.verts_padded()
         # Shift vertices to check coordinate frames are correct.
         verts_padded[..., 1] += 0.2
         verts_padded[..., 0] += 0.2
         pointclouds = Pointclouds(
             points=verts_padded, features=torch.ones_like(verts_padded)
         )
         for azimuth in [0.0, 90.0]:
             R, T = look_at_view_transform(2.7, 0.0, azimuth)
             for camera_name, cameras in [
                 ("fovperspective", FoVPerspectiveCameras(device=device, R=R, T=T)),
                 (
                     "fovorthographic",
                     FoVOrthographicCameras(device=device, R=R, T=T),
                 ),
                 ("perspective", PerspectiveCameras(device=device, R=R, T=T)),
                 ("orthographic", OrthographicCameras(device=device, R=R, T=T)),
             ]:
                 raster_settings = PointsRasterizationSettings(
                     image_size=256, radius=5e-2, points_per_pixel=1
                 )
                 rasterizer = PointsRasterizer(
                     cameras=cameras, raster_settings=raster_settings
                 )
                 renderer = PulsarPointsRenderer(rasterizer=rasterizer).to(device)
                 # Load reference image
                 filename = (
                     "pulsar_simple_pointcloud_sphere_"
                     f"azimuth{azimuth}_{camera_name}.png"
                 )
                 image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
                 images = renderer(
                     pointclouds, gamma=(1e-3,), znear=(1.0,), zfar=(100.0,)
                 )
                 rgb = images[0, ..., :3].squeeze().cpu()
                 if DEBUG:
                     filename = "DEBUG_%s" % filename
                     Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
                         DATA_DIR / filename
                     )
                 self.assertClose(rgb, image_ref, rtol=7e-3, atol=5e-3)
Exemple #9
0
    def test_join_cameras_as_batch_errors(self):
        cam0 = PerspectiveCameras(device="cuda:0")
        cam1 = OrthographicCameras(device="cuda:0")

        # Cameras not of the same type
        with self.assertRaisesRegex(ValueError, "same type"):
            join_cameras_as_batch([cam0, cam1])

        cam2 = OrthographicCameras(device="cpu")
        # Cameras not on the same device
        with self.assertRaisesRegex(ValueError, "same device"):
            join_cameras_as_batch([cam1, cam2])

        cam3 = OrthographicCameras(in_ndc=False, device="cuda:0")
        # Different coordinate systems -- all should be in ndc or in screen
        with self.assertRaisesRegex(
            ValueError, "Attribute _in_ndc is not constant across inputs"
        ):
            join_cameras_as_batch([cam1, cam3])
Exemple #10
0
    def test_invert_eye_at_up(self):
        # Generate random cameras and check we can reconstruct their eye, at,
        # and up vectors.
        N = 13
        eye = torch.rand(N, 3)
        at = torch.rand(N, 3)
        up = torch.rand(N, 3)

        R, T = look_at_view_transform(eye=eye, at=at, up=up)
        cameras = PerspectiveCameras(R=R, T=T)

        eye2, at2, up2 = camera_to_eye_at_up(
            cameras.get_world_to_view_transform())

        # The retrieved eye matches
        self.assertClose(eye, eye2, atol=1e-5)
        self.assertClose(cameras.get_camera_center(), eye)

        # at-eye as retrieved must be a vector in the same direction as
        # the original.
        self.assertClose(normalize(at - eye), normalize(at2 - eye2))

        # The up vector as retrieved should be rotated the same amount
        # around at-eye as the original. The component in the at-eye
        # direction is unimportant, as is the length.
        # So check that (up x (at-eye)) as retrieved is in the same
        # direction as its original value.
        up_check = torch.cross(up, at - eye, dim=-1)
        up_check2 = torch.cross(up2, at - eye, dim=-1)
        self.assertClose(normalize(up_check), normalize(up_check2))

        # Master check that we get the same camera if we reinitialise.
        R2, T2 = look_at_view_transform(eye=eye2, at=at2, up=up2)
        cameras2 = PerspectiveCameras(R=R2, T=T2)
        cam_trans = cameras.get_world_to_view_transform()
        cam_trans2 = cameras2.get_world_to_view_transform()

        self.assertClose(cam_trans.get_matrix(),
                         cam_trans2.get_matrix(),
                         atol=1e-5)
Exemple #11
0
    def test_to(self):
        cpu_device = torch.device("cpu")
        cuda_device = torch.device("cuda:0")

        R, T = look_at_view_transform()

        shader_classes = [
            HardFlatShader,
            HardGouraudShader,
            HardPhongShader,
            SoftPhongShader,
        ]

        for shader_class in shader_classes:
            for cameras_class in (None, PerspectiveCameras):
                if cameras_class is None:
                    cameras = None
                else:
                    cameras = PerspectiveCameras(device=cpu_device, R=R, T=T)

                cpu_shader = shader_class(device=cpu_device, cameras=cameras)
                if cameras is None:
                    self.assertIsNone(cpu_shader.cameras)
                else:
                    self.assertEqual(cpu_device, cpu_shader.cameras.device)
                self.assertEqual(cpu_device, cpu_shader.materials.device)
                self.assertEqual(cpu_device, cpu_shader.lights.device)

                cuda_shader = cpu_shader.to(cuda_device)
                self.assertIs(cpu_shader, cuda_shader)
                if cameras is None:
                    self.assertIsNone(cuda_shader.cameras)
                else:
                    self.assertEqual(cuda_device, cuda_shader.cameras.device)
                self.assertEqual(cuda_device, cuda_shader.materials.device)
                self.assertEqual(cuda_device, cuda_shader.lights.device)
Exemple #12
0
 def test_perspective_type(self):
     cam = PerspectiveCameras(focal_length=5.0,
                              principal_point=((2.5, 2.5), ))
     self.assertTrue(cam.is_perspective())
     self.assertEquals(cam.get_znear(), None)
Exemple #13
0
 def test_unified_inputs_pulsar(self):
     # Test data on different devices.
     for device in [torch.device("cpu"), torch.device("cuda")]:
         sphere_mesh = ico_sphere(1, device)
         verts_padded = sphere_mesh.verts_padded()
         pointclouds = Pointclouds(
             points=verts_padded, features=torch.ones_like(verts_padded)
         )
         R, T = look_at_view_transform(2.7, 0.0, 0.0)
         # Test the different camera types.
         for _, cameras in [
             ("fovperspective", FoVPerspectiveCameras(device=device, R=R, T=T)),
             (
                 "fovorthographic",
                 FoVOrthographicCameras(device=device, R=R, T=T),
             ),
             ("perspective", PerspectiveCameras(device=device, R=R, T=T)),
             ("orthographic", OrthographicCameras(device=device, R=R, T=T)),
         ]:
             # Test different ways for image size specification.
             for image_size in (256, (256, 256)):
                 raster_settings = PointsRasterizationSettings(
                     image_size=image_size, radius=5e-2, points_per_pixel=1
                 )
                 rasterizer = PointsRasterizer(
                     cameras=cameras, raster_settings=raster_settings
                 )
                 # Test that the compositor can be provided. It's value is ignored
                 # so use a dummy.
                 _ = PulsarPointsRenderer(rasterizer=rasterizer, compositor=1).to(
                     device
                 )
                 # Constructor without compositor.
                 _ = PulsarPointsRenderer(rasterizer=rasterizer).to(device)
                 # Constructor with n_channels.
                 _ = PulsarPointsRenderer(rasterizer=rasterizer, n_channels=3).to(
                     device
                 )
                 # Constructor with max_num_spheres.
                 renderer = PulsarPointsRenderer(
                     rasterizer=rasterizer, max_num_spheres=1000
                 ).to(device)
                 # Test the forward function.
                 if isinstance(cameras, (PerspectiveCameras, OrthographicCameras)):
                     # znear and zfar is required in this case.
                     self.assertRaises(
                         ValueError,
                         lambda: renderer.forward(
                             point_clouds=pointclouds, gamma=(1e-4,)
                         ),
                     )
                     renderer.forward(
                         point_clouds=pointclouds,
                         gamma=(1e-4,),
                         znear=(1.0,),
                         zfar=(2.0,),
                     )
                     # znear and zfar must be batched.
                     self.assertRaises(
                         TypeError,
                         lambda: renderer.forward(
                             point_clouds=pointclouds,
                             gamma=(1e-4,),
                             znear=1.0,
                             zfar=(2.0,),
                         ),
                     )
                     self.assertRaises(
                         TypeError,
                         lambda: renderer.forward(
                             point_clouds=pointclouds,
                             gamma=(1e-4,),
                             znear=(1.0,),
                             zfar=2.0,
                         ),
                     )
                 else:
                     # gamma must be batched.
                     self.assertRaises(
                         TypeError,
                         lambda: renderer.forward(
                             point_clouds=pointclouds, gamma=1e-4
                         ),
                     )
                     renderer.forward(point_clouds=pointclouds, gamma=(1e-4,))
                     # rasterizer width and height change.
                     renderer.rasterizer.raster_settings.image_size = 0
                     self.assertRaises(
                         ValueError,
                         lambda: renderer.forward(
                             point_clouds=pointclouds, gamma=(1e-4,)
                         ),
                     )
 def __init__(self):
     super(SceneModel, self).__init__()
     self.gamma = 0.1
     # Points.
     torch.manual_seed(1)
     vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0
     vert_pos[:, 2] += 25.0
     vert_pos[:, :2] -= 5.0
     self.register_parameter("vert_pos",
                             nn.Parameter(vert_pos, requires_grad=False))
     self.register_parameter(
         "vert_col",
         nn.Parameter(
             torch.rand(N_POINTS, 3, dtype=torch.float32),
             requires_grad=False,
         ),
     )
     self.register_parameter(
         "vert_rad",
         nn.Parameter(
             torch.rand(N_POINTS, dtype=torch.float32),
             requires_grad=False,
         ),
     )
     self.register_parameter(
         "cam_pos",
         nn.Parameter(
             torch.tensor([0.1, 0.1, 0.0], dtype=torch.float32),
             requires_grad=True,
         ),
     )
     self.register_parameter(
         "cam_rot",
         # We're using the 6D rot. representation for better gradients.
         nn.Parameter(
             axis_angle_to_matrix(
                 torch.tensor(
                     [
                         [0.02, 0.02, 0.01],
                     ],
                     dtype=torch.float32,
                 ))[0],
             requires_grad=True,
         ),
     )
     self.register_parameter(
         "focal_length",
         nn.Parameter(
             torch.tensor(
                 [
                     4.8 * 2.0 / 2.0,
                 ],
                 dtype=torch.float32,
             ),
             requires_grad=True,
         ),
     )
     self.cameras = PerspectiveCameras(
         # The focal length must be double the size for PyTorch3D because of the NDC
         # coordinates spanning a range of two - and they must be normalized by the
         # sensor width (see the pulsar example). This means we need here
         # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar.
         #
         # R, T and f are provided here, but will be provided again
         # at every call to the forward method. The reason are problems
         # with PyTorch which makes device placement for gradients problematic
         # for tensors which are themselves on a 'gradient path' but not
         # leafs in the calculation tree. This will be addressed by an architectural
         # change in PyTorch3D in the future. Until then, this workaround is
         # recommended.
         focal_length=self.focal_length,
         R=self.cam_rot[None, ...],
         T=self.cam_pos[None, ...],
         image_size=((HEIGHT, WIDTH), ),
         device=DEVICE,
     )
     raster_settings = PointsRasterizationSettings(
         image_size=(HEIGHT, WIDTH),
         radius=self.vert_rad,
     )
     rasterizer = PointsRasterizer(cameras=self.cameras,
                                   raster_settings=raster_settings)
     self.renderer = PulsarPointsRenderer(rasterizer=rasterizer)
Exemple #15
0
def main(args):
    # set for reproducibility
    torch.manual_seed(42)
    if args.dtype == "float":
        args.dtype = torch.float32
    elif args.dtype == "double":
        args.dtype = torch.float64

    # ## 1. Set up Cameras and load ground truth positions

    # load the SE3 graph of relative/absolute camera positions
    if (args.input_folder / "images.bin").isfile():
        ext = '.bin'
    elif (args.input_folder / "images.txt").isfile():
        ext = '.txt'
    else:
        print('error')
        return
    cameras, images, points3D = read_model(args.input_folder, ext)

    images_df = pd.DataFrame.from_dict(images, orient="index").set_index("id")
    cameras_df = pd.DataFrame.from_dict(cameras,
                                        orient="index").set_index("id")
    points_df = pd.DataFrame.from_dict(points3D,
                                       orient="index").set_index("id")
    print(points_df)
    print(images_df)
    print(cameras_df)

    ref_pointcloud = PyntCloud.from_file(args.ply)
    ref_pointcloud = torch.from_numpy(ref_pointcloud.xyz).to(device,
                                                             dtype=args.dtype)

    points_3d = np.stack(points_df["xyz"].values)
    points_3d = torch.from_numpy(points_3d).to(device, dtype=args.dtype)

    cameras_R = np.stack(
        [qvec2rotmat(q) for _, q in images_df["qvec"].iteritems()])
    cameras_R = torch.from_numpy(cameras_R).to(device,
                                               dtype=args.dtype).transpose(
                                                   1, 2)

    cameras_T = torch.from_numpy(np.stack(images_df["tvec"].values)).to(
        device, dtype=args.dtype)

    cameras_params = torch.from_numpy(np.stack(
        cameras_df["params"].values)).to(device, dtype=args.dtype)
    cameras_params = cameras_params[:, :4]
    print(cameras_params)

    # Constructu visibility map, True at (frame, point) if point is visible by frame, False otherwise
    # Thus, we can ignore reprojection errors for invisible points
    visibility = np.full((cameras_R.shape[0], points_3d.shape[0]), False)
    visibility = pd.DataFrame(visibility,
                              index=images_df.index,
                              columns=points_df.index)

    points_2D_gt = []
    for idx, (pts_ids, xy) in images_df[["point3D_ids", "xys"]].iterrows():
        pts_ids_clean = pts_ids[pts_ids != -1]
        pts_2D = pd.DataFrame(xy[pts_ids != -1], index=pts_ids_clean)
        pts_2D = pts_2D[~pts_2D.index.duplicated(keep=False)].reindex(
            points_df.index).dropna()
        points_2D_gt.append(pts_2D.values)
        visibility.loc[idx, pts_2D.index] = True

    print(visibility)

    visibility = torch.from_numpy(visibility.values).to(device)
    eps = 1e-3
    # Visibility map is very sparse. So we can use Pytorch3d's function to reduce points_2D size
    # to (num_frames, max points seen by frame)
    points_2D_gt = list_to_padded([torch.from_numpy(p) for p in points_2D_gt],
                                  pad_value=eps).to(device, dtype=args.dtype)
    print(points_2D_gt)

    cameras_df["raw_id"] = np.arange(len(cameras_df))
    cameras_id_per_image = torch.from_numpy(
        cameras_df["raw_id"][images_df["camera_id"]].values).to(device)
    # the number of absolute camera positions
    N = len(images_df)
    nonzer = (points_2D_gt != eps).all(dim=-1)

    # print(padded)
    # print(points_2D_gt, points_2D_gt.shape)

    # ## 2. Define optimization functions
    #
    # ### Relative cameras and camera distance
    # We now define two functions crucial for the optimization.
    #
    # **`calc_camera_distance`** compares a pair of cameras.
    # This function is important as it defines the loss that we are minimizing.
    # The method utilizes the `so3_relative_angle` function from the SO3 API.
    #
    # **`get_relative_camera`** computes the parameters of a relative camera
    # that maps between a pair of absolute cameras. Here we utilize the `compose`
    # and `inverse` class methods from the PyTorch3D Transforms API.

    def calc_camera_distance(cam_1, cam_2):
        """
        Calculates the divergence of a batch of pairs of cameras cam_1, cam_2.
        The distance is composed of the cosine of the relative angle between
        the rotation components of the camera extrinsics and the l2 distance
        between the translation vectors.
        """
        # rotation distance
        R_distance = (
            1. - so3_relative_angle(cam_1.R, cam_2.R, cos_angle=True)).mean()
        # translation distance
        T_distance = ((cam_1.T - cam_2.T)**2).sum(1).mean()
        # the final distance is the sum
        return R_distance + T_distance

    # ## 3. Optimization
    # Finally, we start the optimization of the absolute cameras.
    #
    # We use SGD with momentum and optimize over `log_R_absolute` and `T_absolute`.
    #
    # As mentioned earlier, `log_R_absolute` is the axis angle representation of the
    # rotation part of our absolute cameras. We can obtain the 3x3 rotation matrix
    # `R_absolute` that corresponds to `log_R_absolute` with:
    #
    # `R_absolute = so3_exponential_map(log_R_absolute)`
    #

    fxfyu0v0 = cameras_params[cameras_id_per_image]
    cameras_absolute_gt = PerspectiveCameras(
        focal_length=fxfyu0v0[:, :2],
        principal_point=fxfyu0v0[:, 2:],
        R=cameras_R,
        T=cameras_T,
        device=device,
    )

    # Normally, the points_2d are the one we should use to minimize reprojection errors.
    # But we have been dealing with unstability, so we can reproject the 3D points instead and use their reprojection
    # since we assume Colmap's bundle adjuster to have converged alone before.
    use_3d_points = True
    if use_3d_points:
        with torch.no_grad():
            padded_points = list_to_padded(
                [points_3d[visibility[c]] for c in range(N)], pad_value=1e-3)
            points_2D_gt = cameras_absolute_gt.transform_points(
                padded_points, eps=1e-4)[:, :, :2]
            relative_points_gt = padded_points @ cameras_R + cameras_T

    # Starting point is normally points_3d and camera_R and camera_T
    # For stability test, you can try to add noise and see if the otpitmization
    # gets back to intial state (spoiler alert, it's complicated)
    # Set noise and shift to 0 for a normal starting point
    noise = 0
    shift = 0.1
    points_init = points_3d + noise * torch.randn(
        points_3d.shape, dtype=torch.float32, device=device) + shift

    log_R_init = so3_log_map(cameras_R) + noise * torch.randn(
        N, 3, dtype=torch.float32, device=device)
    T_init = cameras_T + noise * torch.randn(
        cameras_T.shape, dtype=torch.float32, device=device) - shift
    cams_init = cameras_params  # + noise * torch.randn(cameras_params.shape, dtype=torch.float32, device=device)

    # instantiate a copy of the initialization of log_R / T
    log_R = log_R_init.clone().detach()
    log_R.requires_grad = True
    T = T_init.clone().detach()
    T.requires_grad = True

    cams_params = cams_init.clone().detach()
    cams_params.requires_grad = True

    points = points_init.clone().detach()
    points.requires_grad = True

    # init the optimizer
    # Different learning rates per parameter ? By intuition I'd say that it should be higher for T and lower for log_R
    # Params could be optimized as well but it's unlikely to be interesting
    param_groups = [{
        'params': points,
        'lr': args.lr
    }, {
        'params': log_R,
        'lr': 0.1 * args.lr
    }, {
        'params': T,
        'lr': 2 * args.lr
    }, {
        'params': cams_params,
        'lr': 0
    }]
    optimizer = torch.optim.SGD(param_groups, lr=args.lr, momentum=0.9)

    # run the optimization
    n_iter = 200000  # fix the number of iterations
    # Compute inliers
    # In the model, some 3d points have their reprojection way off compared to the
    # target 2d point. It is potentially a great source of instability. inliers is
    # keeping track of those problematic points to discard them from optimization
    discard_outliers = True
    if discard_outliers:
        with torch.no_grad():
            padded_points = list_to_padded(
                [points_3d[visibility[c]] for c in range(N)], pad_value=1e-3)
            projected_points = cameras_absolute_gt.transform_points(
                padded_points, eps=1e-4)[:, :, :2]
            points_distance = ((projected_points[nonzer] -
                                points_2D_gt[nonzer])**2).sum(dim=1)
            inliers = (points_distance < 100).clone().detach()
            print(inliers)
    else:
        inliers = points_2D_gt[nonzer] == points_2D_gt[
            nonzer]  # All true, except NaNs
    loss_log = []
    cam_dist_log = []
    pts_dist_log = []
    for it in range(n_iter):
        # re-init the optimizer gradients
        optimizer.zero_grad()
        R = so3_exponential_map(log_R)

        fxfyu0v0 = cams_params[cameras_id_per_image]
        # get the current absolute cameras
        cameras_absolute = PerspectiveCameras(
            focal_length=fxfyu0v0[:, :2],
            principal_point=fxfyu0v0[:, 2:],
            R=R,
            T=T,
            device=device,
        )

        padded_points = list_to_padded(
            [points[visibility[c]] for c in range(N)], pad_value=1e-3)

        # two ways of optimizing :
        # 1) minimize 2d projection error. Potentially unstable, especially with very close points.
        # This is problematic as close points are the ones with which we want the pose modification to be low
        # but gradient descent makes them with the highest step size. We can maybe use Adam, but unstability remains.
        #
        # 2) minimize 3d relative position error (initial 3d relative position is considered groundtruth). No more unstability for very close points.
        # 2d reprojection error is not guaranteed to be minimized though

        minimize_2d = True
        chamfer_weight = 1e3
        verbose = True

        chamfer_dist = chamfer_distance(ref_pointcloud[None], points[None])[0]
        if minimize_2d:
            projected_points_3D = cameras_absolute.transform_points(
                padded_points, eps=1e-4)[..., :2]
            projected_points = projected_points_3D[:, :, :2]
            # Discard points with a depth < 0 (theoretically impossible)
            inliers = inliers & (projected_points_3D[:, :, 2][nonzer] > 0)

            # Plot point distants for first image
            # distances = (projected_points[0] - points_2D_gt[0]).norm(dim=-1).detach().cpu().numpy()
            # from matplotlib import pyplot as plt
            # plt.plot(distances[:(visibility[0]).sum()])

            # Different loss functions for reprojection error minimization
            # points_distance = smooth_l1_loss(projected_points, points_2D_gt)
            # points_distance = (smooth_l1_loss(projected_points, points_2D_gt, reduction='none')[nonzer]).sum(dim=1)
            proj_error = ((projected_points[nonzer] -
                           points_2D_gt[nonzer])**2).sum(dim=1)
            proj_error_filtered = proj_error[inliers]
        else:
            projected_points_3D = padded_points @ R + T

            # Plot point distants for first image
            # distances = (projected_points_3D[0] - relative_points_gt[0]).norm(dim=-1).detach().cpu().numpy()
            # from matplotlib import pyplot as plt
            # plt.plot(distances[:(visibility[0]).sum()])

            # Different loss functions for reprojection error minimization
            # points_distance = smooth_l1_loss(projected_points, points_2D_gt)
            # points_distance = (smooth_l1_loss(projected_points, points_2D_gt, reduction='none')[nonzer]).sum(dim=1)
            proj_error = ((projected_points_3D[nonzer] -
                           relative_points_gt[nonzer])**2).sum(dim=1)
            proj_error_filtered = proj_error[inliers]

        loss = proj_error_filtered.mean() + chamfer_weight * chamfer_dist
        loss.backward()

        if verbose:
            print("faulty elements (with nan grad) :")
            faulty_points = torch.arange(
                points.shape[0])[points.grad[:, 0] != points.grad[:, 0]]
            faulty_images = torch.arange(
                log_R.shape[0])[log_R.grad[:, 0] != log_R.grad[:, 0]]
            faulty_cams = torch.arange(cams_params.shape[0])[
                cams_params.grad[:, 0] != cams_params.grad[:, 0]]
            faulty_projected_points = torch.arange(
                projected_points.shape[1])[torch.isnan(
                    projected_points.grad).any(dim=2)[0]]

            # Print Tensor that would become NaN, should the gradient be applied
            print("Faulty Rotation (log) and translation")
            print(faulty_images)
            print(log_R[faulty_images])
            print(T[faulty_images])
            print("Faulty 3D colmap points")
            print(faulty_points)
            print(points[faulty_points])
            print("Faulty Cameras")
            print(faulty_cams)
            print(cams_params[faulty_cams])
            print("Faulty 2D points")
            print(projected_points[faulty_projected_points])
            first_faulty_point = points_df.iloc[int(faulty_points[0])]
            related_faulty_images = images_df.loc[
                first_faulty_point["image_ids"][0]]

            print("First faulty point, and images where it is seen")
            print(first_faulty_point)
            print(related_faulty_images)

        # apply the gradients
        optimizer.step()

        # plot and print status message
        if it % 2000 == 0 or it == n_iter - 1:
            camera_distance = calc_camera_distance(cameras_absolute,
                                                   cameras_absolute_gt)
            print(
                'iteration = {}; loss = {}, chamfer distance = {}, camera_distance = {}'
                .format(it, loss, chamfer_distance, camera_distance))
            loss_log.append(loss.item())
            pts_dist_log.append(chamfer_distance.item())
            cam_dist_log.append(camera_distance.item())
        if it % 20000 == 0 or it == n_iter - 1:
            with torch.no_grad():
                from matplotlib import pyplot as plt
                plt.hist(
                    torch.sqrt(proj_error_filtered).detach().cpu().numpy())
        if it % 200000 == 0 or it == n_iter - 1:
            plt.figure()
            plt.plot(loss_log)
            plt.figure()
            plt.plot(pts_dist_log, label="chamfer_dist")
            plt.plot(cam_dist_log, label="cam_dist")
            plt.legend()
            plot_camera_scene(
                cameras_absolute, cameras_absolute_gt, points, ref_pointcloud,
                'iteration={}; chamfer distance={}'.format(
                    it, chamfer_distance))

    print('Optimization finished.')
Exemple #16
0
    def show_depth(self, depth_loss: float, name_postfix: str,
                   loss_mask_now: torch.Tensor):
        self._viz.images(
            torch.cat(
                (
                    make_depth_image(self.depth_render, loss_mask_now),
                    make_depth_image(self.depth_map, loss_mask_now),
                ),
                dim=3,
            ),
            env=self.visdom_env,
            win="depth_abs" + name_postfix,
            opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}"},
        )
        self._viz.images(
            loss_mask_now,
            env=self.visdom_env,
            win="depth_abs" + name_postfix + "_mask",
            opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_mask"},
        )
        self._viz.images(
            self.depth_mask,
            env=self.visdom_env,
            win="depth_abs" + name_postfix + "_maskd",
            opts={
                "title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_maskd"
            },
        )

        # show the 3D plot
        # pyre-fixme[9]: viewpoint_trivial has type `PerspectiveCameras`; used as
        #  `TensorProperties`.
        viewpoint_trivial: PerspectiveCameras = PerspectiveCameras().to(
            loss_mask_now.device)
        pcl_pred = get_rgbd_point_cloud(
            viewpoint_trivial,
            self.image_render,
            self.depth_render,
            # mask_crop,
            torch.ones_like(self.depth_render),
            # loss_mask_now,
        )
        pcl_gt = get_rgbd_point_cloud(
            viewpoint_trivial,
            self.image_rgb_masked,
            self.depth_map,
            # mask_crop,
            torch.ones_like(self.depth_map),
            # loss_mask_now,
        )
        _pcls = {
            pn: p
            for pn, p in zip(("pred_depth", "gt_depth"), (pcl_pred, pcl_gt))
            if int(p.num_points_per_cloud()) > 0
        }
        plotlyplot = plot_scene(
            {f"pcl{name_postfix}": _pcls},
            camera_scale=1.0,
            pointcloud_max_points=10000,
            pointcloud_marker_size=1,
        )
        self._viz.plotlyplot(
            plotlyplot,
            env=self.visdom_env,
            win=f"pcl{name_postfix}",
        )