def render_channels(channels, disparity, source_pose, source_intrinsics, target_pose, target_intrinsics): """Render channels from new target position, given disparity. Args: channels: [B, H, W, C] Channels to render disparity: [B, H, W, 1] Inverse depth source_pose: [B, 3, 4] reference camera pose source_intrinsics: [B, 4] reference intrinsics target_pose: [B, 3, 4] target camera pose target_intrinsics: [B, 4] target intrinsics Returns: [B, H, W, C] Rendered channels at the target view. [B, H, W, 1] Rendered disparity at the target view. """ (batch_size, height, width, channel_count) = channels.get_shape().as_list() # Relative pose maps source to target pose. relative_pose = geometry.mat34_product( target_pose, geometry.mat34_pose_inverse(source_pose)) # Project source image into 3D mesh. vertices = render_utils.create_vertices_intrinsics(disparity[Ellipsis, 0], source_intrinsics) # Depth of each point from target camera. target_depths = geometry.mat34_transform(relative_pose, vertices)[Ellipsis, -1:] # Add target-view depths as an extra vertex attribute. attributes = tf.reshape(channels, (batch_size, width * height, channel_count)) attributes = tf.concat([attributes, target_depths], -1) # Get triangles, triangles = render_utils.create_triangles(height, width) num_triangles = triangles.shape[0] triangles = tf.convert_to_tensor(triangles, tf.int32) # Camera matrices. target_perspective = render_utils.perspective_from_intrinsics( target_intrinsics) relative_pose = geometry.mat34_to_mat44(relative_pose) proj_matrix = tf.matmul(target_perspective, relative_pose) # Zero background value for channels, large background value for depth. background = [0.0] * channel_count + [1000.0] # Render with mesh_renderer library output = rasterize_triangles.rasterize(vertices, attributes, triangles, proj_matrix, width, height, background) output_channels, output_depths = tf.split(output, [channel_count, 1], axis=-1) output_disparity = tf.math.divide_no_nan( 1.0, tf.clip_by_value(output_depths, 1.0 / 100.0, 1.0 / 0.01)) return (output_channels, output_disparity)
def fly_step(rgbd): nonlocal camera_to_world nonlocal look_dir nonlocal move_dir nonlocal down nonlocal position nonlocal t if turn_function: (xoff, yoff) = turn_function(t) else: (xoff, yoff) = (0.0, 0.0) xoff += math.sin( t * 2.0 * math.pi / meander_x_period) * meander_x_magnitude yoff += math.sin( t * 2.0 * math.pi / meander_y_period) * meander_y_magnitude t = t + 1 down = camera_to_world[:, 1] # Comment this out for fixed down disparity = rgbd[Ellipsis, 3:] x, y, h = skyline_balance(disparity, horizon=horizon, near_fraction=near_fraction) if reverse: h = 1.0 - h x = 1.0 - x look_uv = tf.stack([x + xoff, y + yoff]) move_uv = tf.stack([0.5, h]) uvs = tf.stack([look_uv, move_uv], axis=0) # Points in world points = geometry.mat34_transform( camera_to_world, geometry.texture_to_camera_coordinates(uvs, intrinsics)) new_look_dir = tf.math.l2_normalize(points[0] - position) new_move_dir = tf.math.l2_normalize(points[1] - position) # Very simple smoothing look_dir = look_dir * (1.0 - lerp) + new_look_dir * lerp move_dir = move_dir * (1.0 - movelerp) + new_move_dir * movelerp position = position + move_dir * speed # Next pose pose = camera_with_look_direction(position, look_dir, down) camera_to_world = geometry.mat34_pose_inverse(pose) return pose
def camera_with_look_direction(position, look_direction, down_direction): """A camera pose specified by where it is and what direction it looks in. Args: position: [..., 3] position of camera in world. look_direction: [..., 3] direction of optical axis (need not be normalised). down_direction: [..., 3] a direction that should project to down (+ve Y). Returns: [..., 3, 4] Camera pose. """ # We construct world vectors that correspond to the three axis in camera # space. # look_direction is like Z, down_direction is like Y. # Y cross Z = X (right-hand rule). vector_z = tf.math.l2_normalize(look_direction, axis=-1) vector_x = tf.math.l2_normalize( tf.linalg.cross(down_direction, vector_z), axis=-1) vector_y = tf.linalg.cross(vector_z, vector_x) # With these three vectors and the pose, we can build the camera matrix: camera_to_world = tf.stack([vector_x, vector_y, vector_z, position], axis=-1) return geometry.mat34_pose_inverse(camera_to_world)
def fly_dynamic(intrinsics, initial_pose, speed=0.2, lerp=0.05, movelerp=0.05, horizon=0.3, near_fraction=0.2, meander_x_period=100, meander_x_magnitude=0.0, meander_y_period=100, meander_y_magnitude=0.0, turn_function=None): """Return a function for flying a camera heuristically. This flying function looks at the disparity as it goes and decides whether to look more up/down or left/right, and also whether to try to fly further away from or nearer to the ground. Args: intrinsics: [4] Camera intrinsics. initial_pose: [3, 4] Initial camera pose. speed: How far to move per step. lerp: How fast to converge look direction to target. movelerp: How fast to converge movement to target. horizon: What fraction of the image should lie above the horizon near_fraction: meander_x_period: Number of frames to produce a cyclic meander in the horizontal direction meander_x_magnitude: How far to meander horizontally meander_y_period: Number of frames to produce a cyclic meander in the vertical direciton meander_y_magnitude: How far to meander vertically turn_function: A function which returns an x, y position to turn towards Returns: a function fly_step which takes an rgbd image and returns the pose for the the next camera. Call fly_step repeatedly to generate a series of poses. This is a stateful function and will internally keep track of camera position and velocity. Can only operate in eager mode. """ # Where is the camera looking, and which way is down: camera_to_world = geometry.mat34_pose_inverse(initial_pose) look_dir = camera_to_world[:, 2] move_dir = look_dir # Begin by moving forwards. down = camera_to_world[:, 1] position = camera_to_world[:, 3] t = 0 reverse = (speed < 0) def fly_step(rgbd): nonlocal camera_to_world nonlocal look_dir nonlocal move_dir nonlocal down nonlocal position nonlocal t if turn_function: (xoff, yoff) = turn_function(t) else: (xoff, yoff) = (0.0, 0.0) xoff += math.sin( t * 2.0 * math.pi / meander_x_period) * meander_x_magnitude yoff += math.sin( t * 2.0 * math.pi / meander_y_period) * meander_y_magnitude t = t + 1 down = camera_to_world[:, 1] # Comment this out for fixed down disparity = rgbd[Ellipsis, 3:] x, y, h = skyline_balance(disparity, horizon=horizon, near_fraction=near_fraction) if reverse: h = 1.0 - h x = 1.0 - x look_uv = tf.stack([x + xoff, y + yoff]) move_uv = tf.stack([0.5, h]) uvs = tf.stack([look_uv, move_uv], axis=0) # Points in world points = geometry.mat34_transform( camera_to_world, geometry.texture_to_camera_coordinates(uvs, intrinsics)) new_look_dir = tf.math.l2_normalize(points[0] - position) new_move_dir = tf.math.l2_normalize(points[1] - position) # Very simple smoothing look_dir = look_dir * (1.0 - lerp) + new_look_dir * lerp move_dir = move_dir * (1.0 - movelerp) + new_move_dir * movelerp position = position + move_dir * speed # Next pose pose = camera_with_look_direction(position, look_dir, down) camera_to_world = geometry.mat34_pose_inverse(pose) return pose return fly_step