def test_rotate_on_spot_yaw(self): N = 14 eye = torch.rand(N, 3) at = torch.rand(N, 3) up = torch.rand(N, 3) R, T = look_at_view_transform(eye=eye, at=at, up=up) # Moving around the y axis looks left. angles = torch.FloatTensor([0, -radians(10), 0]) rotation = axis_angle_to_matrix(angles) R_rot, T_rot = rotate_on_spot(R, T, rotation) eye_rot, at_rot, up_rot = camera_to_eye_at_up( get_world_to_view_transform(R=R_rot, T=T_rot)) self.assertClose(eye, eye_rot, atol=1e-5) # Make vectors pointing exactly left and up left = torch.cross(up, at - eye, dim=-1) left_rot = torch.cross(up_rot, at_rot - eye_rot, dim=-1) fully_up = torch.cross(at - eye, left, dim=-1) fully_up_rot = torch.cross(at_rot - eye_rot, left_rot, dim=-1) # The up direction is unchanged self.assertClose(normalize(fully_up), normalize(fully_up_rot), atol=1e-5) # The camera has moved left agree = _batched_dotprod(torch.cross(left, left_rot, dim=1), fully_up) self.assertGreater(agree.min(), 0) # Batch dimension for rotation R_rot2, T_rot2 = rotate_on_spot(R, T, rotation.expand(N, 3, 3)) self.assertClose(R_rot, R_rot2) self.assertClose(T_rot, T_rot2) # No batch dimension for either R_rot3, T_rot3 = rotate_on_spot(R[0], T[0], rotation) self.assertClose(R_rot[:1], R_rot3) self.assertClose(T_rot[:1], T_rot3) # No batch dimension for R, T R_rot4, T_rot4 = rotate_on_spot(R[0], T[0], rotation.expand(N, 3, 3)) self.assertClose(R_rot[:1].expand(N, 3, 3), R_rot4) self.assertClose(T_rot[:1].expand(N, 3), T_rot4)
def test_rotate_on_spot_roll(self): N = 14 eye = torch.rand(N, 3) at = torch.rand(N, 3) up = torch.rand(N, 3) R, T = look_at_view_transform(eye=eye, at=at, up=up) # Moving around the z axis rotates the image. angles = torch.FloatTensor([0, 0, -radians(10)]) rotation = axis_angle_to_matrix(angles) R_rot, T_rot = rotate_on_spot(R, T, rotation) eye_rot, at_rot, up_rot = camera_to_eye_at_up( get_world_to_view_transform(R=R_rot, T=T_rot)) self.assertClose(eye, eye_rot, atol=1e-5) self.assertClose(normalize(at - eye), normalize(at_rot - eye), atol=1e-5) # The camera has moved clockwise agree = _batched_dotprod(torch.cross(up, up_rot, dim=1), at - eye) self.assertGreater(agree.min(), 0)
def _transform_cam_params( cam_params: torch.Tensor, width: int, height: int, orthogonal: bool, right_handed: bool, first_R_then_T: bool = False, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, ]: """ Transform 8 component camera parameter vector(s) to the internal camera representation. The input vectors consists of: * 3 components for camera position, * 3 components for camera rotation (three rotation angles) or 6 components as described in "On the Continuity of Rotation Representations in Neural Networks" (Zhou et al.), * focal length, * the sensor width in world coordinates, * [optional] the principal point offset in x and y. The sensor height is inferred by pixel size and sensor width to obtain quadratic pixels. Args: * cam_params: [Bx]{8, 10, 11, 13}, input tensors as described above. * width: number of pixels in x direction. * height: number of pixels in y direction. * orthogonal: bool, whether an orthogonal projection is used (does not use focal length). * right_handed: bool, whether to use a right handed system (negative z in camera direction). * first_R_then_T: bool, whether to first rotate, then translate the camera (PyTorch3D convention). Returns: * pos_vec: the position vector in 3D, * pixel_0_0_center: the center of the upper left pixel in world coordinates, * pixel_vec_x: the step to move one pixel on the image x axis in world coordinates, * pixel_vec_y: the step to move one pixel on the image y axis in world coordinates, * focal_length: the focal lengths, * principal_point_offsets: the principal point offsets in x, y. """ global AXANGLE_WARNING_EMITTED # Set up all direction vectors, i.e., the sensor direction of all axes. assert width > 0 assert height > 0 batch_processing = True if cam_params.ndimension() == 1: batch_processing = False cam_params = cam_params[None, :] batch_size = cam_params.size(0) continuous_rep = True if cam_params.shape[1] in [8, 10]: if cam_params.requires_grad and not AXANGLE_WARNING_EMITTED: warnings.warn( "Using an axis angle representation for camera rotations. " "This has discontinuities and should not be used for optimization. " "Alternatively, use a six-component representation as described in " "'On the Continuity of Rotation Representations in Neural Networks'" " (Zhou et al.). " "The `pytorch3d.transforms` module provides " "facilities for using this representation.") AXANGLE_WARNING_EMITTED = True continuous_rep = False else: assert cam_params.shape[1] in [11, 13] pos_vec: torch.Tensor = cam_params[:, :3] principal_point_offsets: torch.Tensor = torch.zeros( (cam_params.shape[0], 2), dtype=torch.int32, device=cam_params.device) if continuous_rep: rot_vec = cam_params[:, 3:9] focal_length: torch.Tensor = cam_params[:, 9:10] sensor_size_x = cam_params[:, 10:11] if cam_params.shape[1] == 13: principal_point_offsets: torch.Tensor = cam_params[:, 11:13].to( torch. int32) else: rot_vec = cam_params[:, 3:6] focal_length: torch.Tensor = cam_params[:, 6:7] sensor_size_x = cam_params[:, 7:8] if cam_params.shape[1] == 10: principal_point_offsets: torch.Tensor = cam_params[:, 8:10].to( torch.int32) # Always get quadratic pixels. pixel_size_x = sensor_size_x / float(width) sensor_size_y = height * pixel_size_x if continuous_rep: rot_mat = rotation_6d_to_matrix(rot_vec) else: rot_mat = axis_angle_to_matrix(rot_vec) if first_R_then_T: pos_vec = torch.matmul(rot_mat, pos_vec[..., None])[:, :, 0] LOGGER.debug( "Camera position: %s, rotation: %s. Focal length: %s.", str(pos_vec), str(rot_vec), str(focal_length), ) sensor_dir_x = torch.matmul( rot_mat, torch.tensor([1.0, 0.0, 0.0], dtype=torch.float32, device=rot_mat.device).repeat(batch_size, 1)[:, :, None], )[:, :, 0] sensor_dir_y = torch.matmul( rot_mat, torch.tensor([0.0, -1.0, 0.0], dtype=torch.float32, device=rot_mat.device).repeat(batch_size, 1)[:, :, None], )[:, :, 0] sensor_dir_z = torch.matmul( rot_mat, torch.tensor([0.0, 0.0, 1.0], dtype=torch.float32, device=rot_mat.device).repeat(batch_size, 1)[:, :, None], )[:, :, 0] if right_handed: sensor_dir_z *= -1 LOGGER.debug( "Sensor direction vectors: %s, %s, %s.", str(sensor_dir_x), str(sensor_dir_y), str(sensor_dir_z), ) if orthogonal: sensor_center = pos_vec else: sensor_center = pos_vec + focal_length * sensor_dir_z LOGGER.debug("Sensor center: %s.", str(sensor_center)) sensor_luc = ( # Sensor left upper corner. sensor_center - sensor_dir_x * (sensor_size_x / 2.0) - sensor_dir_y * (sensor_size_y / 2.0)) LOGGER.debug("Sensor luc: %s.", str(sensor_luc)) pixel_size_x = sensor_size_x / float(width) pixel_size_y = sensor_size_y / float(height) LOGGER.debug("Pixel sizes (x): %s, (y) %s.", str(pixel_size_x), str(pixel_size_y)) pixel_vec_x: torch.Tensor = sensor_dir_x * pixel_size_x pixel_vec_y: torch.Tensor = sensor_dir_y * pixel_size_y pixel_0_0_center = sensor_luc + 0.5 * pixel_vec_x + 0.5 * pixel_vec_y LOGGER.debug( "Pixel 0 centers: %s, vec x: %s, vec y: %s.", str(pixel_0_0_center), str(pixel_vec_x), str(pixel_vec_y), ) if not orthogonal: LOGGER.debug( "Camera horizontal fovs: %s deg.", str(2.0 * torch.atan(0.5 * sensor_size_x / focal_length) / math.pi * 180.0), ) LOGGER.debug( "Camera vertical fovs: %s deg.", str(2.0 * torch.atan(0.5 * sensor_size_y / focal_length) / math.pi * 180.0), ) # Reduce dimension. focal_length: torch.Tensor = focal_length[:, 0] if batch_processing: return ( pos_vec, pixel_0_0_center, pixel_vec_x, pixel_vec_y, focal_length, principal_point_offsets, ) else: return ( pos_vec[0], pixel_0_0_center[0], pixel_vec_x[0], pixel_vec_y[0], focal_length[0], principal_point_offsets[0], )
def __init__(self): super(SceneModel, self).__init__() self.gamma = 0.1 # Points. torch.manual_seed(1) vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0 vert_pos[:, 2] += 25.0 vert_pos[:, :2] -= 5.0 self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=False)) self.register_parameter( "vert_col", nn.Parameter( torch.rand(N_POINTS, 3, dtype=torch.float32), requires_grad=False, ), ) self.register_parameter( "vert_rad", nn.Parameter( torch.rand(N_POINTS, dtype=torch.float32), requires_grad=False, ), ) self.register_parameter( "cam_pos", nn.Parameter( torch.tensor([0.1, 0.1, 0.0], dtype=torch.float32), requires_grad=True, ), ) self.register_parameter( "cam_rot", # We're using the 6D rot. representation for better gradients. nn.Parameter( axis_angle_to_matrix( torch.tensor( [ [0.02, 0.02, 0.01], ], dtype=torch.float32, ))[0], requires_grad=True, ), ) self.register_parameter( "focal_length", nn.Parameter( torch.tensor( [ 4.8 * 2.0 / 2.0, ], dtype=torch.float32, ), requires_grad=True, ), ) self.cameras = PerspectiveCameras( # The focal length must be double the size for PyTorch3D because of the NDC # coordinates spanning a range of two - and they must be normalized by the # sensor width (see the pulsar example). This means we need here # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar. # # R, T and f are provided here, but will be provided again # at every call to the forward method. The reason are problems # with PyTorch which makes device placement for gradients problematic # for tensors which are themselves on a 'gradient path' but not # leafs in the calculation tree. This will be addressed by an architectural # change in PyTorch3D in the future. Until then, this workaround is # recommended. focal_length=self.focal_length, R=self.cam_rot[None, ...], T=self.cam_pos[None, ...], image_size=((HEIGHT, WIDTH), ), device=DEVICE, ) raster_settings = PointsRasterizationSettings( image_size=(HEIGHT, WIDTH), radius=self.vert_rad, ) rasterizer = PointsRasterizer(cameras=self.cameras, raster_settings=raster_settings) self.renderer = PulsarPointsRenderer(rasterizer=rasterizer)