예제 #1
0
def rotate_image_on_pano(images, rotations, fov, output_shape):
    """Transform perspective images to equirectangular images after rotations.

  Return equirectangular panoramic images in which the input perspective images
  embedded in after the rotation R from the input images' frame to the target
  frame. The image with the field of view "fov" centered at camera's look-at -Z
  axis is projected onto the pano. The -Z axis corresponds to the spherical
  coordinates (pi/2, pi/2) which is (HEIGHT/2, WIDTH/4) on the pano.

  Args:
    images: [BATCH, HEIGHT, WIDTH, CHANNEL] perspective view images.
    rotations: [BATCH, 3, 3] rotations matrices.
    fov: (float) images' field of view in degrees.
    output_shape: a 2-D list of output dimension [height, width].

  Returns:
    equirectangular images [BATCH, height, width, CHANNELS].
  """
    with tf.name_scope(None, 'rotate_image_on_pano',
                       [images, rotations, fov, output_shape]):
        if len(images.shape) != 4:
            raise ValueError("'images' has the wrong dimensions.")
        if rotations.shape[-2:] != [3, 3]:
            raise ValueError("'rotations' must have 3x3 dimensions.")

        shape = images.shape.as_list()
        batch, height, width = shape[0], shape[1], shape[2]
        # Generate a mesh grid on a sphere.
        spherical = geometry.generate_equirectangular_grid(output_shape)
        cartesian = geometry.spherical_to_cartesian(spherical)
        cartesian = tf.tile(cartesian[tf.newaxis, :, :, :, tf.newaxis],
                            [batch, 1, 1, 1, 1])
        axis_convert = tf.constant([[0., -1., 0.], [0., 0., 1.], [1., 0., 0.]])
        cartesian = tf.matmul(axis_convert, cartesian)
        cartesian = tf.squeeze(
            tf.matmul(rotations[:, tf.newaxis, tf.newaxis], cartesian), -1)
        # Only take one hemisphere. (camera lookat direction)
        hemisphere_mask = tf.cast(cartesian[:, :, :, -1:] < 0, tf.float32)
        image_coordinates = cartesian[:, :, :, :2] / cartesian[:, :, :, -1:]
        x, y = tf.split(image_coordinates, [1, 1], -1)
        # Map pixels on equirectangular pano to perspective image.
        nx = -x * width / (2 * tf.tan(math_utils.degrees_to_radians(
            fov / 2))) + width / 2 - 0.5
        ny = y * height / (2 * tf.tan(math_utils.degrees_to_radians(
            fov / 2))) + height / 2 - 0.5
        transformed = hemisphere_mask * tfa.image.resampler(
            images, tf.concat([nx, ny], -1))
        return transformed
예제 #2
0
def rotate_image_in_3d(images, input_rotations, input_fov, output_fov,
                       output_shape):
    """Return reprojected perspective view images given a rotated camera.

  This function applies a homography H = K_output * R^T * K_input' where
  K_output and K_input are the output and input camera intrinsics, R is the
  rotation from the input images' frame to the target frame.

  Args:
    images: [BATCH, HEIGHT, WIDTH, CHANNEL] perspective view images.
    input_rotations: [BATCH, 3, 3] rotations matrices from current camera frame
      to target camera frame.
    input_fov: [BATCH] a 1-D tensor (float32) of input field of view in degrees.
    output_fov: (float) output field of view in degrees.
    output_shape: a 2-D list of output dimension [height, width].

  Returns:
    reprojected images [BATCH, height, width, CHANNELS].
  """
    with tf.name_scope(
            None, 'rotate_image_in_3d',
        [images, input_rotations, input_fov, output_fov, output_shape]):
        if len(images.shape) != 4:
            raise ValueError("'images' has the wrong dimensions.")
        if input_rotations.shape[-2:] != [3, 3]:
            raise ValueError("'input_rotations' must have 3x3 dimensions.")

        shape = images.shape.as_list()
        batch, height, width = shape[0], shape[1], shape[2]
        cartesian = geometry.generate_cartesian_grid(output_shape, output_fov)
        cartesian = tf.tile(cartesian[tf.newaxis, :, :, :, tf.newaxis],
                            [batch, 1, 1, 1, 1])
        input_rotations = tf.tile(
            input_rotations[:, tf.newaxis, tf.newaxis, :],
            [1] + output_shape + [1, 1])
        cartesian = tf.squeeze(
            tf.matmul(input_rotations, cartesian, transpose_a=True), -1)
        image_coordinates = -cartesian[:, :, :, :2] / cartesian[:, :, :, -1:]
        x, y = tf.split(image_coordinates, [1, 1], -1)
        w = 2 * tf.tan(math_utils.degrees_to_radians(input_fov / 2))
        h = 2 * tf.tan(math_utils.degrees_to_radians(input_fov / 2))
        w = w[:, tf.newaxis, tf.newaxis, tf.newaxis]
        h = h[:, tf.newaxis, tf.newaxis, tf.newaxis]
        nx = x * width / w + width / 2 - 0.5
        ny = -y * height / h + height / 2 - 0.5
        return tfa.image.resampler(images, tf.concat([nx, ny], -1))
예제 #3
0
def generate_cartesian_grid(resolution, fov):
    """Get (x, y, z) coordinates of all pixel centres in the image.

  The image plane lies at z=-1 and the image center is (0, 0, -1).
  Args:
    resolution: a 2-D list containing the resolution (height, width)
                of the desired output.
    fov: (float) camera's horizontal field of view in degrees.

  Returns:
    3-D tensor of shape `[HEIGHT, WIDTH, 3]`

  Raises:
    ValueError: 'resolution' is not valid.
  """
    with tf.name_scope(None, 'generate_cartesian_grid', [resolution, fov]):
        if not isinstance(resolution, list) or len(resolution) != 2:
            raise ValueError("'resolution' is not valid.")

        fov = fov / 180 * math.pi
        width = 2 * tf.tan(fov / 2)
        height = width * resolution[0] / resolution[1]
        pixel_size = width / resolution[1]
        x_range = width - pixel_size
        y_range = height - pixel_size
        # x increases from left to right while y increases from bottom to top.
        # Use half-integer pixel centre convention, and generate the coordinates
        # for the centres of the pixels.
        # For example, a 2x3 grid with pixel_size=1 (height=2, width=3) should have
        # [(-1.0,  0.5), (0.0,  0.5), (1.0,  0.5),
        #  (-1.0, -0.5), (0.0, -0.5), (1.0, -0.5)]
        xx, yy = tf.meshgrid(
            tf.lin_space(-x_range / 2, x_range / 2, resolution[1]),
            tf.lin_space(y_range / 2, -y_range / 2, resolution[0]))
        grid = tf.stack([xx, yy, -tf.ones_like(xx)], axis=-1)
        return grid