def _test_op_backward_pass(self, on_gpu, dtype, tol):
        np.random.seed(13)
        data_width = 5
        data_height = 4
        data_channels = 3
        warp_width = 2
        warp_height = 6
        batch_size = 3

        warp = _make_warp(batch_size, warp_height, warp_width,
                          dtype.as_numpy_dtype)
        data_shape = (batch_size, data_height, data_width, data_channels)
        data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype)

        with self.test_session(use_gpu=on_gpu, force_gpu=False):
            data_tensor = constant_op.constant(data)
            warp_tensor = constant_op.constant(warp)
            output_tensor = resampler.resampler(data=data_tensor,
                                                warp=warp_tensor)

            grads = test.compute_gradient([data_tensor, warp_tensor], [
                data_tensor.get_shape().as_list(),
                warp_tensor.get_shape().as_list()
            ], output_tensor,
                                          output_tensor.get_shape().as_list(),
                                          [data, warp])

            if not on_gpu:
                # On CPU we perform numerical differentiation at the best available
                # precision, and compare against that. This is necessary for test to
                # pass for float16.
                data_tensor_64 = constant_op.constant(data,
                                                      dtype=dtypes.float64)
                warp_tensor_64 = constant_op.constant(warp,
                                                      dtype=dtypes.float64)
                output_tensor_64 = resampler.resampler(data=data_tensor_64,
                                                       warp=warp_tensor_64)
                grads_64 = test.compute_gradient(
                    [data_tensor_64, warp_tensor_64], [
                        data_tensor.get_shape().as_list(),
                        warp_tensor.get_shape().as_list()
                    ], output_tensor_64,
                    output_tensor.get_shape().as_list(), [data, warp])

                for g, g_64 in zip(grads, grads_64):
                    self.assertLess(np.fabs(g[0] - g_64[1]).max(), tol)

            else:
                for g in grads:
                    self.assertLess(np.fabs(g[0] - g[1]).max(), tol)
    def _test_op_forward_pass(self, on_gpu, dtype, tol):
        np.random.seed(0)
        data_width = 7
        data_height = 9
        data_channels = 5
        warp_width = 4
        warp_height = 8
        batch_size = 10

        warp = _make_warp(batch_size, warp_height, warp_width,
                          dtype.as_numpy_dtype)
        data_shape = (batch_size, data_height, data_width, data_channels)
        data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype)

        with self.test_session(use_gpu=on_gpu, force_gpu=False) as sess:
            data_ph = array_ops.placeholder(dtype,
                                            shape=(None, ) + data.shape[1:])
            warp_ph = array_ops.placeholder(dtype,
                                            shape=(None, ) + warp.shape[1:])
            outputs = resampler.resampler(data=data_ph, warp=warp_ph)
            self.assertEqual(outputs.get_shape().as_list(),
                             [None, warp_height, warp_width, data_channels])
            out = sess.run(outputs, feed_dict={data_ph: data, warp_ph: warp})

        # Generate reference output via bilinear interpolation in numpy
        reference_output = np.zeros_like(out)
        for batch in xrange(batch_size):
            for c in xrange(data_channels):
                reference_output[batch, :, :, c] = _bilinearly_interpolate(
                    data[batch, :, :, c], warp[batch, :, :, 0],
                    warp[batch, :, :, 1])

        self.assertAllClose(out, reference_output, rtol=tol, atol=tol)
Ejemplo n.º 3
0
def bilinear_wrapper(imgs, coords):
    """Wrapper around bilinear sampling function, handles arbitrary input sizes.

  Args:
    imgs: are [B,H,W,C]
    coords: [B,H,W,2] indicating the source pixels to copy from
  Returns:
    [B,H,W,C] images after bilinear sampling from input.
  """
    # the bilinear sampling code only handles 4D input, so we'll need to reshape
    init_dims = tf.shape(imgs)[:-3]
    end_dims_img = tf.shape(imgs)[-3:]
    end_dims_coords = tf.shape(coords)[-3:]

    prod_init_dims = tf.reduce_prod(init_dims)

    imgs = tf.reshape(
        imgs, tf.concat([prod_init_dims[tf.newaxis], end_dims_img], axis=0))
    coords = tf.reshape(
        coords, tf.concat([prod_init_dims[tf.newaxis], end_dims_coords],
                          axis=0))
    imgs_sampled = contrib_resampler.resampler(imgs, coords)
    imgs_sampled = tf.reshape(
        imgs_sampled,
        tf.concat([init_dims, tf.shape(imgs_sampled)[-3:]], axis=0))
    return imgs_sampled
Ejemplo n.º 4
0
  def _test_op_forward_pass(self, on_gpu, dtype, tol):
    np.random.seed(0)
    data_width = 7
    data_height = 9
    data_channels = 5
    warp_width = 4
    warp_height = 8
    batch_size = 10

    warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype)
    data_shape = (batch_size, data_height, data_width, data_channels)
    data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype)

    with self.test_session(use_gpu=on_gpu, force_gpu=False) as sess:
      data_ph = array_ops.placeholder(dtype, shape=(None,) + data.shape[1:])
      warp_ph = array_ops.placeholder(dtype, shape=(None,) + warp.shape[1:])
      outputs = resampler.resampler(data=data_ph, warp=warp_ph)
      self.assertEqual(outputs.get_shape().as_list(),
                       [None, warp_height, warp_width, data_channels])
      out = sess.run(outputs, feed_dict={data_ph: data, warp_ph: warp})

    # Generate reference output via bilinear interpolation in numpy
    reference_output = np.zeros_like(out)
    for batch in xrange(batch_size):
      for c in xrange(data_channels):
        reference_output[batch, :, :, c] = _bilinearly_interpolate(
            data[batch, :, :, c],
            warp[batch, :, :, 0],
            warp[batch, :, :, 1])

    self.assertAllClose(out, reference_output, rtol=tol, atol=tol)
Ejemplo n.º 5
0
def flow_gather(source_images, flows):
    """Gather from a tensor of images.

  Args:
    source_images: 5D tensor of images [B, H, W, D, 3]
    flows: 5D tensor of x/y offsets to gather for each slice (pixel offsets)
  Returns:
    warped_imgs_reshape: 5D tensor of gathered (warped) images [B, H, W, D, 3]
  """
    batchsize = tf.shape(source_images)[0]
    height = tf.shape(source_images)[1]
    width = tf.shape(source_images)[2]
    num_depths = tf.shape(source_images)[3]
    source_images_reshape = tf.reshape(
        tf.transpose(source_images, [0, 3, 1, 2, 4]),
        [batchsize * num_depths, height, width, 3])
    flows_reshape = tf.reshape(tf.transpose(flows, [0, 3, 1, 2, 4]),
                               [batchsize * num_depths, height, width, 2])
    _, h, w = tf.meshgrid(tf.range(tf.to_float(batchsize * num_depths),
                                   dtype=tf.float32),
                          tf.range(tf.to_float(height), dtype=tf.float32),
                          tf.range(tf.to_float(width), dtype=tf.float32),
                          indexing='ij')
    coords_y = tf.clip_by_value(h + flows_reshape[Ellipsis, 0], 0.0,
                                tf.to_float(height))
    coords_x = tf.clip_by_value(w + flows_reshape[Ellipsis, 1], 0.0,
                                tf.to_float(width))
    sampling_coords = tf.stack([coords_x, coords_y], axis=-1)
    warped_imgs = contrib_resampler.resampler(source_images_reshape,
                                              sampling_coords)
    warped_imgs_reshape = tf.transpose(
        tf.reshape(warped_imgs, [batchsize, num_depths, height, width, 3]),
        [0, 2, 3, 1, 4])
    return warped_imgs_reshape
Ejemplo n.º 6
0
Archivo: morn.py Proyecto: aptlin/moran
    def call(self, inputs, training=False):
        batch_size, height, width, channels = tf.shape(inputs)
        if training and np.random.random() > 0.5:
            return UpSampling2D(
                (self.target_height / height, self.target_width / width),
                interpolation="bilinear",
            )
        assert batch_size <= self.max_batch_size
        grid = self.grid[:batch_size]
        grid_x = self.grid_x[:batch_size]
        grid_y = self.grid_y[:batch_size]
        input_sample = UpSampling2D(
            (self.target_height / height, self.target_width / width),
            interpolation="bilinear",
        )

        offsets = self.filters(input_sample)
        offsets_pos = RELU()(offsets)
        offsets_neg = RELU()(-offsets)
        offsets_pool = self.pool(offsets_pos) - self.pool(offsets_neg)

        offsets_grid = tf.transpose(
            resampler(offsets_pool, grid), [0, 2, 3, 1]
        )
        input_offsets = tf.concat(
            values=[grid_x, grid_y + offsets_grid], axis=3
        )
        rectified_input = resampler(inputs, input_offsets)

        if training:
            return rectified_input

        offsets = self.filters(rectified_input)

        offsets_pos = RELU()(offsets)
        offsets_neg = RELU()(-offsets)
        offsets_pool = self.pool(offsets_pos) - self.pool(offsets_neg)

        offsets_grid += tf.transpose(
            resampler(offsets_pool, grid), [0, 2, 3, 1]
        )
        input_offsets = tf.concat(
            values=[grid_x, grid_y + offsets_grid], axis=3
        )
        rectified_input = resampler(inputs, input_offsets)

        return inputs
Ejemplo n.º 7
0
  def _assertForwardOpMatchesExpected(self, image_np, warp_np, expected):
    with self.test_session() as sess, self.test_scope():
      input_image = array_ops.placeholder(image_np.dtype)
      warp = array_ops.placeholder(warp_np.dtype)
      resampled = resampler.resampler(input_image, warp, name='resampler')
      out = sess.run(resampled, {input_image: image_np, warp: warp_np})

      self.assertAllCloseAccordingToType(
          expected, out, rtol=5e-3, half_rtol=1e-2, bfloat16_rtol=3e-2)
  def _assertForwardOpMatchesExpected(self, image_np, warp_np, expected):
    with self.test_session() as sess, self.test_scope():
      input_image = array_ops.placeholder(image_np.dtype)
      warp = array_ops.placeholder(warp_np.dtype)
      resampled = resampler.resampler(input_image, warp, name='resampler')
      out = sess.run(resampled, {input_image: image_np, warp: warp_np})

      self.assertAllCloseAccordingToType(
          expected, out, rtol=5e-3, half_rtol=1e-2, bfloat16_rtol=3e-2)
Ejemplo n.º 9
0
def apply_line_prediction(inputs,
                          features,
                          blur_steps,
                          learn_alpha=True,
                          name=None):
  """Applies "Line Prediction" layer to input images."""
  inputs.shape.assert_is_compatible_with([None, None, None, 6])

  with tf.name_scope(name, 'blur_prediction', values=[inputs, features]):

    with tf.name_scope(None, 'input_frames', values=[inputs]):
      frames = [inputs[:, :, :, :3], inputs[:, :, :, 3:]]

    with tf.name_scope(None, 'frame_size', values=[inputs, features]):
      shape = tf.shape(inputs)
      height = shape[1]
      width = shape[2]

    with tf.name_scope(None, 'identity_warp', values=[]):
      x_idx, y_idx = tf.meshgrid(tf.range(width), tf.range(height))
      identity_warp = tf.to_float(tf.stack([x_idx, y_idx], axis=-1))
      identity_warp = identity_warp[tf.newaxis, :, :, tf.newaxis, :]

      warp_steps = tf.to_float(tf.range(blur_steps - 1) + 1) / (blur_steps - 1)
      warp_steps = warp_steps[tf.newaxis, tf.newaxis, tf.newaxis, :, tf.newaxis]

      max_warps = tf.to_float(tf.stack([width - 1, height - 1]))
      max_warps = max_warps[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, :]

    output_frames = []
    for frame in frames:
      with tf.name_scope(None, 'predict_blurs', values=[features]):
        flow = tf.layers.conv2d(features, 2, 1, padding='same')

        if learn_alpha:
          alpha = tf.layers.conv2d(
              features, blur_steps, 1, padding='same', activation=tf.nn.softmax)

      with tf.name_scope(None, 'apply_blurs', values=[]):
        with tf.name_scope(None, 'warp', values=[frame, flow]):
          warps = identity_warp + flow[:, :, :, tf.newaxis, :] * warp_steps
          warps = tf.clip_by_value(warps, 0.0, max_warps)
          warped = contrib_resampler.resampler(frame, warps)
          warped = tf.concat([frame[:, :, :, tf.newaxis, :], warped], axis=3)

        with tf.name_scope(None, 'apply_alpha', values=[frame, flow]):
          if learn_alpha:
            mask = alpha[:, :, :, :, tf.newaxis]
          else:
            mask = 1.0 / blur_steps
          output_frames.append(tf.reduce_sum(warped * mask, axis=3))

    with tf.name_scope(None, 'outputs', values=[output_frames]):
      output = tf.add_n(output_frames) / len(frames)
      return output
Ejemplo n.º 10
0
  def _test_op_backward_pass(self, on_gpu, dtype, tol):
    np.random.seed(13)
    data_width = 5
    data_height = 4
    data_channels = 3
    warp_width = 2
    warp_height = 6
    batch_size = 10

    warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype)
    data_shape = (batch_size, data_height, data_width, data_channels)
    data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype)

    with self.test_session(use_gpu=on_gpu, force_gpu=False):
      data_tensor = constant_op.constant(data)
      warp_tensor = constant_op.constant(warp)
      output_tensor = resampler.resampler(data=data_tensor, warp=warp_tensor)

      grads = test.compute_gradient([data_tensor, warp_tensor], [
          data_tensor.get_shape().as_list(),
          warp_tensor.get_shape().as_list()
      ], output_tensor, output_tensor.get_shape().as_list(), [data, warp])

      if not on_gpu:
        # On CPU we perform numerical differentiation at the best available
        # precision, and compare against that. This is necessary for test to
        # pass for float16.
        data_tensor_64 = constant_op.constant(data, dtype=dtypes.float64)
        warp_tensor_64 = constant_op.constant(warp, dtype=dtypes.float64)
        output_tensor_64 = resampler.resampler(data=data_tensor_64,
                                               warp=warp_tensor_64)
        grads_64 = test.compute_gradient([data_tensor_64, warp_tensor_64], [
            data_tensor.get_shape().as_list(),
            warp_tensor.get_shape().as_list()
        ], output_tensor_64, output_tensor.get_shape().as_list(), [data, warp])

        for g, g_64 in zip(grads, grads_64):
          self.assertLess(np.fabs(g[0] - g_64[1]).max(), tol)

      else:
        for g in grads:
          self.assertLess(np.fabs(g[0] - g[1]).max(), tol)
Ejemplo n.º 11
0
def rotate_pano_horizontally(input_feature_map, yaw_angle):
    """Rotates input_feature_map by yaw_angle by horizontally translating pixels.

  The layer is differentiable with respect to yaw_angle and  input_feature_map.
  yaw_angle is positive for CCW rotation about the z-axis where the coordinates
  are constructed with z-axis facing up.

  Args:
    input_feature_map: panoramic image or neural feature maps of shape [B, H, W,
      C].
    yaw_angle: A tensor of shape `[B]` which represents the desired rotation of
      input_feature_map. yaw_angle is in units of radians. A positive yaw_angle
      rolls pixels left.

  Returns:
    A rotated feature map with dimensions `[B, H, W, C]`

  Reference:
  [1]: 'Spatial Transformer Networks', Jaderberg et. al,
       (https://arxiv.org/abs/1506.02025)
  """

    # Number of input dimensions.
    tfshape = tf.shape(input_feature_map)
    batch_size = tfshape[0]
    height = tfshape[1]
    width = tfshape[2]

    float32_width = tf.cast(width, dtype=tf.float32)
    float32_height = tf.cast(height, dtype=tf.float32)

    x_offset = (yaw_angle / 2 / np.pi) * float32_width

    x_grid = tf.linspace(0., float32_width - 1, width)  # (W)
    # 0.5 * original_image_width to match the convention described in comment
    x_pixel_coord = x_grid[tf.newaxis] + x_offset[:, tf.newaxis]  # (B, W)

    x_pixel_coord = tf.tile(x_pixel_coord[:, tf.newaxis, :],
                            [1, height, 1])  # (B, H, W)
    y_pixel_coord = tf.linspace(0., float32_height - 1,
                                height)[tf.newaxis, :, tf.newaxis]  # (1, H, 1)
    y_pixel_coord = tf.tile(y_pixel_coord, [batch_size, 1, width])
    wrapped_x_pixel_coord = tf.floormod(x_pixel_coord, float32_width)

    # Because these are panoramas, we can concatenate the first column to the
    # right side. This allows us to interpolate values for coordinates that
    # correspond to pixels that connects the left and right edges of the
    # panorama.
    input_feature_map = tf.concat(
        [input_feature_map, input_feature_map[:, :, :1]], axis=2)

    return resampler.resampler(
        input_feature_map,
        tf.stack([wrapped_x_pixel_coord, y_pixel_coord], axis=-1))
Ejemplo n.º 12
0
    def call(self, inputs):
        inputs_shape = tf.shape(inputs)
        assert len(inputs_shape) == 4
        assert inputs_shape[2] == 1

        height = inputs_shape[2]
        width = inputs_shape[3]
        widths = np.arange(width) * 2.0 / (width - 1) - 1
        idx = int(npr.rand() * len(widths))
        if idx > 0 and idx < width - 1:
            beta = npr.rand() / 4.0
            previous = beta * widths[idx] + (1 - beta) * widths[idx - 1]
            current = beta * widths[idx - 1] + (1 - beta) * widths[idx]
            widths[idx - 1] = previous
            widths[idx] = current
        grid = np.meshgrid(widths, height, indexing="ij")
        grid = np.stack(grid, axis=-1)
        grid = np.transpose(grid, (1, 0, 2))
        grid = np.expand_dims(grid, 0)
        grid = np.tile(grid, [inputs_shape[0], 1, 1, 1])
        self.grid = tf.Variable(grid)
        inputs_offset = resampler(inputs, self.grid)

        return inputs_offset
Ejemplo n.º 13
0
def projective_inverse_warp(img, depth, pose, intrinsics, ret_flows=False):
    """Inverse warp a source image to the target image plane based on projection.

  Args:
    img: the source image [batch, height_s, width_s, 3]
    depth: depth map of the target image [batch, height_t, width_t]
    pose: target to source camera transformation matrix [batch, 4, 4]
    intrinsics: camera intrinsics [batch, 3, 3]
    ret_flows: whether to return the displacements/flows as well
  Returns:
    Source image inverse warped to the target image plane [batch, height_t,
    width_t, 3]
  """
    num_depths = tf.shape(depth)[0]
    batch = tf.to_int32(tf.shape(img)[0] / num_depths)
    height = tf.shape(img)[1]
    width = tf.shape(img)[2]
    # Construct pixel grid coordinates
    pixel_coords = meshgrid_abs(batch * num_depths, height, width)
    # Convert pixel coordinates to the camera frame
    cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
    # Construct a 4x4 intrinsic matrix (TODO: can it be 3x4?)
    filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
    filler = tf.tile(filler, [batch * num_depths, 1, 1])
    intrinsics = tf.concat(
        [intrinsics, tf.zeros([batch * num_depths, 3, 1])], axis=2)
    intrinsics = tf.concat([intrinsics, filler], axis=1)
    # Get a 4x4 transformation matrix from 'target' camera frame to 'source'
    # pixel frame.
    proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
    src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
    output_img = contrib_resampler.resampler(img, src_pixel_coords)
    if ret_flows:
        return output_img, src_pixel_coords - cam_coords
    else:
        return output_img
def rgbd_consistency_loss(frame1transformed_depth, frame1rgb, frame2depth,
                          frame2rgb):
  """Computes a loss that penalizes RGB and depth inconsistencies betwen frames.

  This function computes 3 losses that penalize inconsistencies between two
  frames: depth, RGB, and structural similarity. It IS NOT SYMMETRIC with
  respect to both frames. In particular, to address occlusions, it only
  penalizes depth and RGB inconsistencies at pixels where frame1 is closer to
  the camera than frame2. (Why? see https://arxiv.org/abs/1904.04998). Therefore
  the intended usage pattern is running it twice - second time with the two
  frames swapped.

  Args:
    frame1transformed_depth: A transform_depth_map.TransformedDepthMap object
      representing the depth map of frame 1 after it was motion-transformed to
      frame 2, a motion transform that accounts for all camera and object motion
      that occurred between frame1 and frame2. The tensors inside
      frame1transformed_depth are of shape [B, H, W].
    frame1rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at
      frame1.
    frame2depth: A tf.Tensor of shape [B, H, W] containing the depth map at
      frame2.
    frame2rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at
      frame2.

  Returns:
    A dicionary from string to tf.Tensor, with the following entries:
      depth_error: A tf scalar, the depth mismatch error between the two frames.
      rgb_error: A tf scalar, the rgb mismatch error between the two frames.
      ssim_error: A tf scalar, the strictural similarity mismatch error between
        the two frames.
      depth_proximity_weight: A tf.Tensor of shape [B, H, W], representing a
        function that peaks (at 1.0) for pixels where there is depth consistency
        between the two frames, and is small otherwise.
      frame1_closer_to_camera: A tf.Tensor of shape [B, H, W, 1], a mask that is
        1.0 when the depth map of frame 1 has smaller depth than frame 2.
  """
  pixel_xy = frame1transformed_depth.pixel_xy
  frame2depth_resampled = _resample_depth(frame2depth, pixel_xy)
  frame2rgb_resampled = contrib_resampler.resampler(
      frame2rgb, pixel_xy)

  # f1td.depth is the predicted depth at [pixel_y, pixel_x] for frame2. Now we
  # generate (by interpolation) the actual depth values for frame2's depth, at
  # the same locations, so that we can compare the two depths.

  # We penalize inconsistencies between the two frames' depth maps only if the
  # transformed depth map (of frame 1) falls closer to the camera than the
  # actual depth map (of frame 2). This is intended for avoiding penalizing
  # points that become occluded because of the transform.
  # So what about depth inconsistencies where frame1's depth map is FARTHER from
  # the camera than frame2's? These will be handled when we swap the roles of
  # frame 1 and 2 (more in https://arxiv.org/abs/1904.04998).
  frame1_closer_to_camera = tf.to_float(
      tf.logical_and(
          frame1transformed_depth.mask,
          tf.less(frame1transformed_depth.depth, frame2depth_resampled)))
  depth_error = tf.reduce_mean(
      tf.abs(frame2depth_resampled - frame1transformed_depth.depth) *
      frame1_closer_to_camera)

  rgb_error = (
      tf.abs(frame2rgb_resampled - frame1rgb) * tf.expand_dims(
          frame1_closer_to_camera, -1))
  rgb_error = tf.reduce_mean(rgb_error)

  # We generate a weight function that peaks (at 1.0) for pixels where when the
  # depth difference is less than its standard deviation across the frame, and
  # fall off to zero otherwise. This function is used later for weighing the
  # structural similarity loss term. We only want to demand structural
  # similarity for surfaces that are close to one another in the two frames.
  depth_error_second_moment = _weighted_average(
      tf.square(frame2depth_resampled - frame1transformed_depth.depth),
      frame1_closer_to_camera) + 1e-4
  depth_proximity_weight = (
      depth_error_second_moment /
      (tf.square(frame2depth_resampled - frame1transformed_depth.depth) +
       depth_error_second_moment) * tf.to_float(frame1transformed_depth.mask))

  # If we don't stop the gradient training won't start. The reason is presumably
  # that then the network can push the depths apart instead of seeking RGB
  # consistency.
  depth_proximity_weight = tf.stop_gradient(depth_proximity_weight)

  ssim_error, avg_weight = weighted_ssim(
      frame2rgb_resampled,
      frame1rgb,
      depth_proximity_weight,
      c1=float('inf'),  # These values of c1 and c2 work better than defaults.
      c2=9e-6)
  ssim_error = tf.reduce_mean(ssim_error * avg_weight)

  endpoints = {
      'depth_error': depth_error,
      'rgb_error': rgb_error,
      'ssim_error': ssim_error,
      'depth_proximity_weight': depth_proximity_weight,
      'frame1_closer_to_camera': frame1_closer_to_camera
  }
  return endpoints
    def test_op_errors(self):
        data_width = 7
        data_height = 9
        data_depth = 3
        data_channels = 5
        warp_width = 4
        warp_height = 8
        batch_size = 10

        # Input data shape is not defined over a 2D grid, i.e. its shape is not like
        # (batch_size, data_height, data_width, data_channels).
        with self.test_session() as sess:
            data_shape = (batch_size, data_height, data_width, data_depth,
                          data_channels)
            data = np.zeros(data_shape)
            warp_shape = (batch_size, warp_height, warp_width, 2)
            warp = np.zeros(warp_shape)
            outputs = resampler.resampler(constant_op.constant(data),
                                          constant_op.constant(warp))

            with self.assertRaisesRegexp(
                    errors_impl.UnimplementedError,
                    "Only bilinear interpolation is currently "
                    "supported."):
                sess.run(outputs)

        # Warp tensor must be at least a matrix, with shape [batch_size, 2].
        with self.test_session() as sess:
            data_shape = (batch_size, data_height, data_width, data_channels)
            data = np.zeros(data_shape)
            warp_shape = (batch_size, )
            warp = np.zeros(warp_shape)
            outputs = resampler.resampler(constant_op.constant(data),
                                          constant_op.constant(warp))

            with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                         "warp should be at least a matrix"):
                sess.run(outputs)

        # The batch size of the data and warp tensors must be the same.
        with self.test_session() as sess:
            data_shape = (batch_size, data_height, data_width, data_channels)
            data = np.zeros(data_shape)
            warp_shape = (batch_size + 1, warp_height, warp_width, 2)
            warp = np.zeros(warp_shape)
            outputs = resampler.resampler(constant_op.constant(data),
                                          constant_op.constant(warp))

            with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                         "Batch size of data and warp tensor"):
                sess.run(outputs)

        # The warp tensor must contain 2D coordinates, i.e. its shape last dimension
        # must be 2.
        with self.test_session() as sess:
            data_shape = (batch_size, data_height, data_width, data_channels)
            data = np.zeros(data_shape)
            warp_shape = (batch_size, warp_height, warp_width, 3)
            warp = np.zeros(warp_shape)
            outputs = resampler.resampler(constant_op.constant(data),
                                          constant_op.constant(warp))

            with self.assertRaisesRegexp(
                    errors_impl.UnimplementedError,
                    "Only bilinear interpolation is supported, "
                    "warping"):
                sess.run(outputs)
#y = bilinear_sampler(x, v, resize=True, crop=(0,4,0,4))
z = bilinear_sampler(x, v2)

shape = tf.shape(x)
N = shape[0]
H_ = H = shape[1]
W_ = W = shape[2]
h = w = 0
n, h, w = _get_grid_array(N, H, W, h, w)  # [N, H, W, 3]
stacked = tf.stack([h, w], 3)
#stacked =  tf.expand_dims(stacked, axis=0) # [N, H, W, 1]
#stacked = stacked+v

stacked = tf.squeeze(stacked, [4])  # [1, 2, 3, 1]
stacked = stacked + v
z2 = resampler(x, stacked)
z2 = tf.transpose(z2, [0, 2, 1, 3])

with tf.Session() as sess:

    #add coords
    #z3 = resampler(x_,)

    x_, z_, n, h, w, stacked, z2 = sess.run([x, z, n, h, w, stacked, z2])

    #h+=2
    #w+=2

    print(n)
    print('............')
    print(h)
Ejemplo n.º 17
0
def bspline_warp(cps, image, degree, regularization=0, pano_pad=False):
    """Differentiable 2D alignment of a stack of nearby panoramas.

  Entry point for regularized b-spline surface warp with appropriate handling
  for boundary padding of panoramas. Includes the image resampling operation.

  Args:
    cps: Control points [bsz, H_CP, W_CP, d] defining the deformations.
    image: An image tensor [bsz, H, W, 3] from which we sample deformed
      coordinates.
    degree: Defines the degree of the b-spline interpolation.
    regularization: A float ranging from [0, 1] that smooths the extremes of the
      control points. The effect is that the network has some leeway in fitting
      the original control points exactly.
    pano_pad: When true pads the image and uses a cyclical horizontal warp.
      Useful for warping panorama images.

  Returns:
    A warped image based on deformations specified by control points at various
    positions. Has shape [bsz, H, W, d]

  Raises:
    ValueError: If degree is greater than 4 or num_knots - 1, or less than 0.
    InvalidArgumentError: If positions are not in the right range.
  """

    if regularization < 0 or regularization > 1:
        raise ValueError("b-spline regularization must be between [0, 1]")

    if regularization > 0.:
        # Regularizing constraint on the local structure of control points.
        #   New control points is:
        #     regularization * ave_neighbor + (1-regularization) * cp
        cps_down = tf.concat([cps[:, 1:], cps[:, -1:]], axis=1)
        cps_up = tf.concat([cps[:, :1], cps[:, :-1]], axis=1)
        if pano_pad:
            cps_left = tf.roll(cps, shift=1, axis=2)
            cps_right = tf.roll(cps, shift=-1, axis=2)
        else:
            cps_left = tf.concat([cps[:, :, :1], cps[:, :, :-1]], axis=2)
            cps_right = tf.concat([cps[:, :, 1:], cps[:, :, -1:]], axis=2)
        cps_reg = (cps_left + cps_right + cps_up + cps_down) / 4.
        cps = cps * (1 - regularization) + cps_reg * (regularization)
    tf.summary.image("cps_h", cps[Ellipsis, :1])
    tf.summary.image("cps_w", cps[Ellipsis, 1:])

    batch_size, small_h, small_w, unused_d = cps.shape.as_list()
    unused_batch_size, big_h, big_w, unused_d = image.shape.as_list()

    # Control points are "normalized" in the sense that they're agnostic to the
    # resolution of the image being warped.
    cps = cps * np.array([big_h, big_w])

    y_coord = tf.linspace(0., small_h - 3 - 1e-4, big_h - 4)
    y_coord = tf.concat(
        [tf.zeros([2]), y_coord,
         tf.ones([2]) * (small_h - 3 - 1e-4)], axis=0)
    y_coord = y_coord[:, tf.newaxis]
    if pano_pad:
        x_coord = tf.linspace(0., small_w + 1 - 1e-4, big_w)[tf.newaxis, :]
    else:
        x_coord = tf.linspace(0., small_w - 3 - 1e-4, big_w - 4)
        x_coord = tf.concat(
            [tf.zeros([
                2,
            ]), x_coord,
             tf.ones([
                 2,
             ]) * (small_w - 3 - 1e-4)],
            axis=0)
        x_coord = x_coord[tf.newaxis, :]
    y_coord += tf.zeros_like(x_coord)
    x_coord += tf.zeros_like(y_coord)

    stacked_coords = tf.stack([y_coord, x_coord], axis=-1)[tf.newaxis]
    stacked_coords = tf.tile(stacked_coords, [batch_size, 1, 1, 1])
    estimated_offsets = interpolate_2d(cps, stacked_coords, degree,
                                       [False, pano_pad])
    tf.summary.image("y_flowfield", estimated_offsets[Ellipsis, :1])
    tf.summary.image("x_flowfield", estimated_offsets[Ellipsis, 1:])

    y_coord_sample = tf.range(0., big_h, 1)[:, tf.newaxis]
    x_coord_sample = tf.range(0., big_w, 1)[tf.newaxis, :]

    y_coord_sample += tf.zeros_like(x_coord_sample)
    x_coord_sample += tf.zeros_like(y_coord_sample)

    y_coord_sample += estimated_offsets[Ellipsis, 0]
    x_coord_sample += estimated_offsets[Ellipsis, 1]
    y_clipped = tf.clip_by_value(y_coord_sample, 0, big_h - 1)
    if pano_pad:
        x_clipped = tf.floormod(x_coord_sample, big_w)
        image = tf.concat([image, image[:, :, :1]], axis=2)
    else:
        x_clipped = tf.clip_by_value(x_coord_sample, 0, big_w - 1)

    stacked_resampler_coords = tf.stack([x_clipped, y_clipped], axis=-1)
    return contrib_resampler.resampler(image, stacked_resampler_coords)
Ejemplo n.º 18
0
Y1 = tf.reshape(Y, [1500 * 2100])
X1 = tf.reshape(X, [1500 * 2100])
pts = tf.stack([X1, Y1], axis=1)
warp_pts = tf.matmul(pts, rotation)
warp_Y = warp_pts[:, 1]
warp_X = warp_pts[:, 0]
warp_Y1 = tf.reshape(warp_Y, [1500, 2100])
warp_X1 = tf.reshape(warp_X, [1500, 2100])
warp_Y2 = tf.clip_by_value(warp_Y1, 0, 1500)
warp_X2 = tf.clip_by_value(warp_X1, 0, 2100)
warp_combined = tf.stack([warp_X2, warp_Y2], axis=2)

warp = tf.cast(warp_combined, tf.float32)
warp_inp = tf.expand_dims(warp, 0)
resampled = tf.cast(resampler(data_inp, warp_inp), tf.uint8)

# Start a new session to show example output.
with tf.Session() as sess:
    # Required to get the filename matching to run.
    tf.global_variables_initializer().run()

    # Coordinate the loading of image files.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    # Get an image tensor and print its value.
    output = sess.run(resampled)
    print output[0].shape

    plt.figure()
Ejemplo n.º 19
0
  def test_op_errors(self):
    data_width = 7
    data_height = 9
    data_depth = 3
    data_channels = 5
    warp_width = 4
    warp_height = 8
    batch_size = 10

    # Input data shape is not defined over a 2D grid, i.e. its shape is not like
    # (batch_size, data_height, data_width, data_channels).
    with self.test_session() as sess:
      data_shape = (batch_size, data_height, data_width, data_depth,
                    data_channels)
      data = np.zeros(data_shape)
      warp_shape = (batch_size, warp_height, warp_width, 2)
      warp = np.zeros(warp_shape)
      outputs = resampler.resampler(constant_op.constant(data),
                                    constant_op.constant(warp))

      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
                                   "Only bilinear interpolation is currently "
                                   "supported."):
        sess.run(outputs)

    # Warp tensor must be at least a matrix, with shape [batch_size, 2].
    with self.test_session() as sess:
      data_shape = (batch_size, data_height, data_width, data_channels)
      data = np.zeros(data_shape)
      warp_shape = (batch_size,)
      warp = np.zeros(warp_shape)
      outputs = resampler.resampler(constant_op.constant(data),
                                    constant_op.constant(warp))

      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                   "warp should be at least a matrix"):
        sess.run(outputs)

    # The batch size of the data and warp tensors must be the same.
    with self.test_session() as sess:
      data_shape = (batch_size, data_height, data_width, data_channels)
      data = np.zeros(data_shape)
      warp_shape = (batch_size+1, warp_height, warp_width, 2)
      warp = np.zeros(warp_shape)
      outputs = resampler.resampler(constant_op.constant(data),
                                    constant_op.constant(warp))

      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                   "Batch size of data and warp tensor"):
        sess.run(outputs)

    # The warp tensor must contain 2D coordinates, i.e. its shape last dimension
    # must be 2.
    with self.test_session() as sess:
      data_shape = (batch_size, data_height, data_width, data_channels)
      data = np.zeros(data_shape)
      warp_shape = (batch_size, warp_height, warp_width, 3)
      warp = np.zeros(warp_shape)
      outputs = resampler.resampler(constant_op.constant(data),
                                    constant_op.constant(warp))

      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
                                   "Only bilinear interpolation is supported, "
                                   "warping"):
        sess.run(outputs)
def motion_field_consistency_loss(frame1transformed_pixelxy, mask,
                                  rotation1, translation1,
                                  rotation2, translation2):
  """Computes a cycle consistency loss between two motion maps.

  Given two rotation and translation maps (of two frames), and a mapping from
  one frame to the other, this function assists in imposing that the fields at
  frame 1 represent the opposite motion of the ones in frame 2.

  In other words: At any given pixel on frame 1, if we apply the translation and
  rotation designated at that pixel, we land on some pixel in frame 2, and if we
  apply the translation and rotation designated there, we land back at the
  original pixel at frame 1.

  Args:
    frame1transformed_pixelxy: A tf.Tensor of shape [B, H, W, 2] representing
      the motion-transformed location of each pixel in frame 1. It is assumed
      (but not verified) that frame1transformed_pixelxy was obtained by properly
      applying rotation1 and translation1 on the depth map of frame 1.
    mask: A tf.Tensor of shape [B, H, W, 2] expressing the weight of each pixel
      in the calculation of the consistency loss.
    rotation1: A tf.Tensor of shape [B, 3] representing rotation angles.
    translation1: A tf.Tensor of shape [B, H, W, 3] representing translation
      vectors.
    rotation2: A tf.Tensor of shape [B, 3] representing rotation angles.
    translation2: A tf.Tensor of shape [B, H, W, 3] representing translation
      vectors.

  Returns:
    A dicionary from string to tf.Tensor, with the following entries:
      rotation_error: A tf scalar, the rotation consistency error.
      translation_error: A tf scalar, the translation consistency error.
  """

  translation2resampled = contrib_resampler.resampler(
      translation2, tf.stop_gradient(frame1transformed_pixelxy))
  rotation1field = tf.broadcast_to(
      _expand_dims_twice(rotation1, -2), tf.shape(translation1))
  rotation2field = tf.broadcast_to(
      _expand_dims_twice(rotation2, -2), tf.shape(translation2))
  rotation1matrix = transform_utils.matrix_from_angles(rotation1field)
  rotation2matrix = transform_utils.matrix_from_angles(rotation2field)

  rot_unit, trans_zero = transform_utils.combine(
      rotation2matrix, translation2resampled,
      rotation1matrix, translation1)
  eye = tf.eye(3, batch_shape=tf.shape(rot_unit)[:-2])

  transform_utils.matrix_from_angles(rotation1field)  # Delete this later
  transform_utils.matrix_from_angles(rotation2field)  # Delete this later

  # We normalize the product of rotations by the product of their norms, to make
  # the loss agnostic of their magnitudes, only wanting them to be opposite in
  # directions. Otherwise the loss has a tendency to drive the rotations to
  # zero.
  rot_error = tf.reduce_mean(tf.square(rot_unit - eye), axis=(3, 4))
  rot1_scale = tf.reduce_mean(tf.square(rotation1matrix - eye), axis=(3, 4))
  rot2_scale = tf.reduce_mean(tf.square(rotation2matrix - eye), axis=(3, 4))
  rot_error /= (1e-24 + rot1_scale + rot2_scale)
  rotation_error = tf.reduce_mean(rot_error)

  def norm(x):
    return tf.reduce_sum(tf.square(x), axis=-1)

  # Here again, we normalize by the magnitudes, for the same reason.
  translation_error = tf.reduce_mean(
      mask * norm(trans_zero) /
      (1e-24 + norm(translation1) + norm(translation2)))

  return {
      'rotation_error': rotation_error,
      'translation_error': translation_error
  }
def _resample_depth(depth, coordinates):
  depth = tf.expand_dims(depth, -1)
  result = contrib_resampler.resampler(depth, coordinates)
  return tf.squeeze(result, axis=3)