def test_mat34_pose_inverse(self): identity = [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]] translate = [[1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 2.0], [0.0, 0.0, 1.0, 3.0]] untranslate = [[1.0, 0.0, 0.0, -1.0], [0.0, 1.0, 0.0, -2.0], [0.0, 0.0, 1.0, -3.0]] # Rotate around Y axis and translate along X axis. c = math.cos(1.0) s = math.sin(1.0) rotate = [[c, 0.0, -s, 10.0], [0.0, 1.0, 0.0, 0.0], [s, 0.0, c, 0.0]] unrotate = [[c, 0.0, s, -10.0 * c], [0.0, 1.0, 0.0, 0.0], [-s, 0.0, c, 10 * s]] data = [ # A series of (matrix, inverse) pairs to check. We check them both ways. (identity, identity), (translate, untranslate), (rotate, unrotate), # Batched examples ([identity, translate, rotate], [identity, untranslate, unrotate]), ([[identity, translate], [rotate, untranslate]], [[identity, untranslate], [unrotate, translate]]) ] for (matrix, inverse) in data: result = geometry.mat34_pose_inverse(tf.constant(matrix)) self.assertAllClose(inverse, result) reverse = geometry.mat34_pose_inverse(tf.constant(inverse)) self.assertAllClose(matrix, reverse) # Additionally, check that the product with the inverse is the identity product = geometry.mat34_product( tf.constant(matrix), geometry.mat34_pose_inverse(tf.constant(matrix))) (_, identities) = utils.broadcast_to_match(product, tf.constant(identity)) self.assertAllClose(product, identities)
def mat34_transform_planes(m, p): """Transform a set of 3d planes by a 3x4 pose matrix. Args: m: [..., 3, 4] matrix, from source space to target space p: [..., N, 4] set of N planes in source space. Returns: The transformed planes p' in target space. If point x is on the plane p, then point Mx is on the plane p'. The parts of the shape indicated by "..." must match either directly or via broadcasting. Raises: ValueError: if inputs are the wrong shape. """ check_input_m34('m', m) check_input_shape('p', p, -1, 4) (m, p) = utils.broadcast_to_match(m, p, ignore_axes=2) # If x is on the plane p, then p . x = 0. We want to find p' such that # p' . (M x) = 0. Writing T for transpose and i for inverse, this gives us # p'T M x = 0, so p'T = pT Mi. # Planes are stored as (N * 4) rather than (4 * N), i.e. pT rather than p, so # we can use this directly to compute p'T: return tf.matmul(p, mat34_to_mat44(mat34_pose_inverse(m)))
def mat34_transform(m, v): """Transform a set of 3d points by a 3x4 pose matrix. Args: m: [..., 3, 4] matrix v: [..., N, 3] set of N 3d points. Returns: The transformed points mv. The transform is computed as if we added an extra coefficient with value 1.0 to each point, performed a matrix multiplication, and removed the extra coefficient again. The parts of the shape indicated by "..." must match, either directly or via broadcasting. Raises: ValueError: if inputs are the wrong shape. """ check_input_m34('m', m) check_input_shape('v', v, -1, 3) (m, v) = utils.broadcast_to_match(m, v, ignore_axes=2) rotation = m[Ellipsis, :3] # See b/116203395 for why I didn't do the next two lines together as # translation = m[..., tf.newaxis, :, 3]. translation = m[Ellipsis, 3] translation = translation[Ellipsis, tf.newaxis, :] # Now shape is [..., 1, 3]. # Points are stored as (N * 3) rather than (3 * N), so multiply in reverse # rather than transposing them. return tf.matmul(v, rotation, transpose_b=True) + translation
def mat34_product(a, b): """Returns the product of a and b, 3x4 matrices. Args: a: [..., 3, 4] matrix b: [..., 3, 4] matrix Returns: The product ab. The product is computed as if we added an extra row [0, 0, 0, 1] to each matrix, multiplied them, and then removed the extra row. The shapes of a and b must match, either directly or via broadcasting. Raises: ValueError: if a or b are not 3x4 matrices. """ check_input_m34('a', a) check_input_m34('b', b) (a, b) = utils.broadcast_to_match(a, b, ignore_axes=2) # Split translation part off from the rest a33, a_translate = tf.split(a, [3, 1], axis=-1) b33, b_translate = tf.split(b, [3, 1], axis=-1) # Compute parts of the product ab33 = tf.matmul(a33, b33) ab_translate = a_translate + tf.matmul(a33, b_translate) # Assemble return tf.concat([ab33, ab_translate], axis=-1)
def homography_warp(image, homography, height=None, width=None, clamp=True): """Warp an image according to an inverse homography. Args: image: [..., H, W, C] input image homography: [..., 3, 3] homography mapping output to input height: desired output height (or None to use input height) width: desired output width (or None to use input width) clamp: whether to clamp image coordinates (see sample_image doc) Returns: [..., height, width, C] warped image. """ (image, homography) = utils.broadcast_to_match( image, homography, ignore_axes=(3, 2)) if height is None: height = image.shape.as_list()[-3] if width is None: width = image.shape.as_list()[-2] target_coords = pixel_center_grid(height, width) source_coords = apply_homography(homography, target_coords) return sample_image(image, source_coords, clamp=clamp)
def broadcasting_matmul(a, b, **kwargs): (a, b) = utils.broadcast_to_match(a, b, ignore_axes=2) return tf.matmul(a, b, **kwargs)
def render_layers(layers, depths, pose, intrinsics, target_pose, target_intrinsics, height=None, width=None, clamp=True): """Render target layers from MPI representation. Args: layers: [..., L, H, W, C] MPI layers, back to front. depths: [..., L] MPI plane depths, back to front. pose: [..., 3, 4] reference camera pose. intrinsics: [..., 4] reference intrinsics. target_pose: [..., 3, 4] target camera pose. target_intrinsics: [..., 4] target intrinsics. height: height to render to in pixels (or None for input height). width: width to render to in pixels (or None for input width). clamp: whether to clamp image coordinates (see geometry.sample_image doc), i.e. extending the image beyond its size or not. Returns: [..., L, height, width, C] The layers warped to the target view by applying an appropriate homography to each one. """ source_to_target_pose = geometry.mat34_product( target_pose, geometry.mat34_pose_inverse(pose)) # Add a dimension to correspond to L in the poses and intrinsics. pose = pose[Ellipsis, tf.newaxis, :, :] # [..., 1, 3, 4] target_pose = target_pose[Ellipsis, tf.newaxis, :, :] # [..., 1, 3, 4] intrinsics = intrinsics[Ellipsis, tf.newaxis, :] # [..., 1, 4] target_intrinsics = target_intrinsics[Ellipsis, tf.newaxis, :] # [..., 1, 4] # Fronto-parallel plane equations at the given depths, in the reference # camera's frame. normals = tf.constant([0.0, 0.0, 1.0], shape=[1, 3]) depths = -depths[Ellipsis, tf.newaxis] # [..., L, 1] normals, depths = utils.broadcast_to_match(normals, depths, ignore_axes=1) planes = tf.concat([normals, depths], axis=-1) # [..., L, 4] homographies = geometry.inverse_homography(pose, intrinsics, target_pose, target_intrinsics, planes) # [..., L, 3, 3] # Each of the resulting [..., L] homographies knows how to inverse-warp one # of the [..., (H,W), L] images into a new [... (H',W')] target images. target_layers = geometry.homography_warp(layers, homographies, height=height, width=width, clamp=clamp) # The next few lines implement back-face culling. # # We don't want to render content that is behind the camera. (If we did, we # might see upside-down images of the layers.) A typical graphics approach # would be to test each pixel of each layer against a near-plane and discard # those that are in front of it. Here we implement something cheaper: # back-face culling. If the target camera sees the "back" of a layer then we # set that layer's alpha to zero. This is simple and sufficient in practice # to avoid nasty artefacts. # Convert planes to target camera space. target_planes is [..., L, 4] target_planes = geometry.mat34_transform_planes(source_to_target_pose, planes) # Fourth coordinate of plane is negative distance in front of the camera. # target_visible is [..., L] target_visible = tf.cast(target_planes[Ellipsis, -1] < 0.0, dtype=tf.float32) # per_layer_alpha is [..., L, 1, 1, 1] per_layer_alpha = target_visible[Ellipsis, tf.newaxis, tf.newaxis, tf.newaxis] # Multiply alpha channel by per_layer_alpha: non_alpha_channels = target_layers[Ellipsis, :-1] alpha = target_layers[Ellipsis, -1:] * per_layer_alpha target_layers = tf.concat([non_alpha_channels, alpha], axis=-1) return target_layers