def render_channels(channels, disparity, source_pose, source_intrinsics,
                    target_pose, target_intrinsics):
    """Render channels from new target position, given disparity.

  Args:
    channels: [B, H, W, C] Channels to render
    disparity: [B, H, W, 1] Inverse depth
    source_pose: [B, 3, 4] reference camera pose
    source_intrinsics: [B, 4] reference intrinsics
    target_pose: [B, 3, 4] target camera pose
    target_intrinsics: [B, 4] target intrinsics

  Returns:
    [B, H, W, C] Rendered channels at the target view.
    [B, H, W, 1] Rendered disparity at the target view.
  """
    (batch_size, height, width, channel_count) = channels.get_shape().as_list()

    # Relative pose maps source to target pose.
    relative_pose = geometry.mat34_product(
        target_pose, geometry.mat34_pose_inverse(source_pose))

    # Project source image into 3D mesh.
    vertices = render_utils.create_vertices_intrinsics(disparity[Ellipsis, 0],
                                                       source_intrinsics)

    # Depth of each point from target camera.
    target_depths = geometry.mat34_transform(relative_pose, vertices)[Ellipsis,
                                                                      -1:]

    # Add target-view depths as an extra vertex attribute.
    attributes = tf.reshape(channels,
                            (batch_size, width * height, channel_count))
    attributes = tf.concat([attributes, target_depths], -1)

    # Get triangles,
    triangles = render_utils.create_triangles(height, width)
    num_triangles = triangles.shape[0]
    triangles = tf.convert_to_tensor(triangles, tf.int32)

    # Camera matrices.
    target_perspective = render_utils.perspective_from_intrinsics(
        target_intrinsics)
    relative_pose = geometry.mat34_to_mat44(relative_pose)
    proj_matrix = tf.matmul(target_perspective, relative_pose)

    # Zero background value for channels, large background value for depth.
    background = [0.0] * channel_count + [1000.0]

    # Render with mesh_renderer library
    output = rasterize_triangles.rasterize(vertices, attributes, triangles,
                                           proj_matrix, width, height,
                                           background)
    output_channels, output_depths = tf.split(output, [channel_count, 1],
                                              axis=-1)
    output_disparity = tf.math.divide_no_nan(
        1.0, tf.clip_by_value(output_depths, 1.0 / 100.0, 1.0 / 0.01))

    return (output_channels, output_disparity)
    def fly_step(rgbd):
        nonlocal camera_to_world
        nonlocal look_dir
        nonlocal move_dir
        nonlocal down
        nonlocal position
        nonlocal t

        if turn_function:
            (xoff, yoff) = turn_function(t)
        else:
            (xoff, yoff) = (0.0, 0.0)

        xoff += math.sin(
            t * 2.0 * math.pi / meander_x_period) * meander_x_magnitude
        yoff += math.sin(
            t * 2.0 * math.pi / meander_y_period) * meander_y_magnitude
        t = t + 1

        down = camera_to_world[:, 1]  # Comment this out for fixed down
        disparity = rgbd[Ellipsis, 3:]
        x, y, h = skyline_balance(disparity,
                                  horizon=horizon,
                                  near_fraction=near_fraction)
        if reverse:
            h = 1.0 - h
            x = 1.0 - x
        look_uv = tf.stack([x + xoff, y + yoff])
        move_uv = tf.stack([0.5, h])
        uvs = tf.stack([look_uv, move_uv], axis=0)

        # Points in world
        points = geometry.mat34_transform(
            camera_to_world,
            geometry.texture_to_camera_coordinates(uvs, intrinsics))
        new_look_dir = tf.math.l2_normalize(points[0] - position)
        new_move_dir = tf.math.l2_normalize(points[1] - position)

        # Very simple smoothing
        look_dir = look_dir * (1.0 - lerp) + new_look_dir * lerp
        move_dir = move_dir * (1.0 - movelerp) + new_move_dir * movelerp
        position = position + move_dir * speed

        # Next pose
        pose = camera_with_look_direction(position, look_dir, down)
        camera_to_world = geometry.mat34_pose_inverse(pose)
        return pose
Exemple #3
0
def camera_with_look_direction(position, look_direction, down_direction):
  """A camera pose specified by where it is and what direction it looks in.

  Args:
    position: [..., 3] position of camera in world.
    look_direction: [..., 3] direction of optical axis (need not be normalised).
    down_direction: [..., 3] a direction that should project to down (+ve Y).
  Returns:
    [..., 3, 4] Camera pose.
  """
  # We construct world vectors that correspond to the three axis in camera
  # space.
  # look_direction is like Z, down_direction is like Y.
  # Y cross Z = X (right-hand rule).
  vector_z = tf.math.l2_normalize(look_direction, axis=-1)
  vector_x = tf.math.l2_normalize(
      tf.linalg.cross(down_direction, vector_z), axis=-1)
  vector_y = tf.linalg.cross(vector_z, vector_x)
  # With these three vectors and the pose, we can build the camera matrix:
  camera_to_world = tf.stack([vector_x, vector_y, vector_z, position], axis=-1)
  return geometry.mat34_pose_inverse(camera_to_world)
def fly_dynamic(intrinsics,
                initial_pose,
                speed=0.2,
                lerp=0.05,
                movelerp=0.05,
                horizon=0.3,
                near_fraction=0.2,
                meander_x_period=100,
                meander_x_magnitude=0.0,
                meander_y_period=100,
                meander_y_magnitude=0.0,
                turn_function=None):
    """Return a function for flying a camera heuristically.

  This flying function looks at the disparity as it goes and decides whether
  to look more up/down or left/right, and also whether to try to fly further
  away from or nearer to the ground.

  Args:
    intrinsics: [4] Camera intrinsics.
    initial_pose: [3, 4] Initial camera pose.
    speed: How far to move per step.
    lerp: How fast to converge look direction to target.
    movelerp: How fast to converge movement to target.
    horizon: What fraction of the image should lie above the horizon
    near_fraction:
    meander_x_period: Number of frames to produce a cyclic meander in the
      horizontal direction
    meander_x_magnitude: How far to meander horizontally
    meander_y_period: Number of frames to produce a cyclic meander in the
      vertical direciton
    meander_y_magnitude: How far to meander vertically
    turn_function: A function which returns an x, y position to turn towards

  Returns:
    a function fly_step which takes an rgbd image and returns the pose for the
    the next camera. Call fly_step repeatedly to generate a series of poses.
    This is a stateful function and will internally keep track of camera
    position and velocity. Can only operate in eager mode.
  """
    # Where is the camera looking, and which way is down:
    camera_to_world = geometry.mat34_pose_inverse(initial_pose)
    look_dir = camera_to_world[:, 2]
    move_dir = look_dir  # Begin by moving forwards.
    down = camera_to_world[:, 1]
    position = camera_to_world[:, 3]
    t = 0

    reverse = (speed < 0)

    def fly_step(rgbd):
        nonlocal camera_to_world
        nonlocal look_dir
        nonlocal move_dir
        nonlocal down
        nonlocal position
        nonlocal t

        if turn_function:
            (xoff, yoff) = turn_function(t)
        else:
            (xoff, yoff) = (0.0, 0.0)

        xoff += math.sin(
            t * 2.0 * math.pi / meander_x_period) * meander_x_magnitude
        yoff += math.sin(
            t * 2.0 * math.pi / meander_y_period) * meander_y_magnitude
        t = t + 1

        down = camera_to_world[:, 1]  # Comment this out for fixed down
        disparity = rgbd[Ellipsis, 3:]
        x, y, h = skyline_balance(disparity,
                                  horizon=horizon,
                                  near_fraction=near_fraction)
        if reverse:
            h = 1.0 - h
            x = 1.0 - x
        look_uv = tf.stack([x + xoff, y + yoff])
        move_uv = tf.stack([0.5, h])
        uvs = tf.stack([look_uv, move_uv], axis=0)

        # Points in world
        points = geometry.mat34_transform(
            camera_to_world,
            geometry.texture_to_camera_coordinates(uvs, intrinsics))
        new_look_dir = tf.math.l2_normalize(points[0] - position)
        new_move_dir = tf.math.l2_normalize(points[1] - position)

        # Very simple smoothing
        look_dir = look_dir * (1.0 - lerp) + new_look_dir * lerp
        move_dir = move_dir * (1.0 - movelerp) + new_move_dir * movelerp
        position = position + move_dir * speed

        # Next pose
        pose = camera_with_look_direction(position, look_dir, down)
        camera_to_world = geometry.mat34_pose_inverse(pose)
        return pose

    return fly_step