コード例 #1
0
ファイル: model.py プロジェクト: AdamOlsson/depth_estimation
    def _build_egomotion_test_graph(self):
        """Builds graph for inference of egomotion given two images."""
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            self._image1 = tf.placeholder(
                tf.float32,
                [self.batch_size, self.img_height, self.img_width, 3],
                name='image1')
            self._image2 = tf.placeholder(
                tf.float32,
                [self.batch_size, self.img_height, self.img_width, 3],
                name='image2')
            # The "compute_loss" scope is needed for the checkpoint to load properly.
            with tf.name_scope('compute_loss'):
                rot, trans, _, _ = motion_prediction_net.motion_field_net(
                    images=tf.concat([self._image1, self._image2], axis=-1))
                inv_rot, inv_trans, _, _ = (
                    motion_prediction_net.motion_field_net(images=tf.concat(
                        [self._image2, self._image1], axis=-1)))

            rot = transform_utils.matrix_from_angles(rot)
            inv_rot = transform_utils.matrix_from_angles(inv_rot)
            trans = tf.squeeze(trans, axis=(1, 2))
            inv_trans = tf.squeeze(inv_trans, axis=(1, 2))

            # rot and inv_rot should be the inverses on of the other, but in reality
            # they slightly differ. Averaging rot and inv(inv_rot) gives a better
            # estimator for the rotation. Similarly, trans and rot*inv_trans should
            # be the negatives one of the other, so we average rot*inv_trans and trans
            # to get a better estimator. TODO(gariel): Check if there's an estimator
            # with less variance.
            self.rot = 0.5 * (tf.linalg.inv(inv_rot) + rot)
            self.trans = 0.5 * (-tf.squeeze(
                tf.matmul(self.rot, tf.expand_dims(inv_trans, -1)), axis=-1) +
                                trans)
コード例 #2
0
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat):
    """A helper for using_motion_vector. See docstring therein."""

    if translation.shape.ndims not in (2, 4):
        raise ValueError('\'translation\' should have rank 2 or 4, not %d' %
                         translation.shape.ndims)
    if translation.shape[-1] != 3:
        raise ValueError('translation\'s last dimension should be 3, not %d' %
                         translation.shape[1])
    if translation.shape.ndims == 2:
        translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

    _, height, width = tf.unstack(tf.shape(depth))
    grid = tf.squeeze(tf.stack(
        tf.meshgrid(tf.range(width), tf.range(height), (1, ))),
                      axis=3)
    grid = tf.to_float(grid)
    intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

    rot_mat = transform_utils.matrix_from_angles(rotation_angles)
    # We have to treat separately the case of a per-image rotation vector and a
    # per-image rotation field, because the broadcasting capabilities of einsum
    # are limited.
    if rotation_angles.shape.ndims == 2:
        # The calculation here is identical to the one in inverse_warp above.
        # Howeverwe use einsum for better clarity. Under the hood, einsum performs
        # the reshaping and invocation of BatchMatMul, instead of doing it manually,
        # as in inverse_warp.
        projected_rotation = tf.einsum('bij,bjk,bkl->bil', intrinsic_mat,
                                       rot_mat, intrinsic_mat_inv)
        pcoords = tf.einsum('bij,jhw,bhw->bihw', projected_rotation, grid,
                            depth)
    elif rotation_angles.shape.ndims == 4:
        # We push the H and W dimensions to the end, and transpose the rotation
        # matrix elements (as noted above).
        rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2])
        projected_rotation = tf.einsum('bij,bjkhw,bkl->bilhw', intrinsic_mat,
                                       rot_mat, intrinsic_mat_inv)
        pcoords = tf.einsum('bijhw,jhw,bhw->bihw', projected_rotation, grid,
                            depth)

    projected_translation = tf.einsum('bij,bhwj->bihw', intrinsic_mat,
                                      translation)
    pcoords += projected_translation
    x, y, z = tf.unstack(pcoords, axis=1)
    return x / z, y / z, z
コード例 #3
0
def rgbd_and_motion_consistency_loss(frame1transformed_depth, frame1rgb,
                                     frame2depth, frame2rgb, rotation1,
                                     translation1, rotation2, translation2,
                                     intrinsic_mat, F_gt1, F_gt2, i):
    """A helper that bundles rgbd and motion consistency losses together."""
    endpoints = rgbd_consistency_loss(frame1transformed_depth, frame1rgb,
                                      frame2depth, frame2rgb)
    # We calculate the loss only for when frame1transformed_depth is closer to the
    # camera than frame2 (occlusion-awareness). See explanation in
    # rgbd_consistency_loss above.
    endpoints.update(
        motion_field_consistency_loss(frame1transformed_depth.pixel_xy,
                                      endpoints['frame1_closer_to_camera'],
                                      rotation1, translation1, rotation2,
                                      translation2))
    #temp1 = tf.get_default_graph().get_tensor_by_name("compute_loss/strided_slice_29:0")
    #temp2 = tf.get_default_graph().get_tensor_by_name("compute_loss/strided_slice_30:0")
    #F_gt,_,_= sift_get_fmat(frame1, frame2, total=100, ratio = 0.8, algo=cv2.FM_LMEDS, random = False, display = False)

    print("flagging")
    #F_gt = tf.placeholder(np.float32,shape=(3,3))
    rot1_matrix = transform_utils.matrix_from_angles(rotation1)
    #endpoints.update(Fmat_consistency_loss(F_gt,intrinsic_mat[0,:,:],rot1_matrix[0,:,:],translation1))
    #sess1.close()
    #print(flag4)
    #F_gt1 = tf.placeholder('float',shape = (3,3),name='F_gt1')
    #print('F_gt1 {}'.format(F_gt1))
    #F_gt2 = tf.placeholder('float',shape = (3,3),name='F_gt2')
    if i == 0:
        endpoints.update(
            Fmat_consistency_loss(F_gt1, intrinsic_mat[0, :, :],
                                  rot1_matrix[0, :, :], translation1))
    else:
        endpoints.update(
            Fmat_consistency_loss(F_gt2, intrinsic_mat[0, :, :],
                                  rot1_matrix[0, :, :], translation1))
    return endpoints
コード例 #4
0
def _using_motion_vector_with_distortion(depth,
                                         translation,
                                         rotation_angles,
                                         intrinsic_mat,
                                         distortion_coeff=0.0):
  """A helper for using_motion_vector. See docstring therein."""

  if translation.shape.ndims not in (2, 4):
    raise ValueError('\'translation\' should have rank 2 or 4, not %d' %
                     translation.shape.ndims)
  if translation.shape[-1] != 3:
    raise ValueError('translation\'s last dimension should be 3, not %d' %
                     translation.shape[1])
  if translation.shape.ndims == 2:
    translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

  _, height, width = tf.unstack(tf.shape(depth))
  grid = tf.squeeze(
      tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))),
      axis=3)  # 3 x height x width
  grid = tf.to_float(grid)
  intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

  normalized_grid = tf.einsum('bij,jhw->bihw', intrinsic_mat_inv, grid)
  radii_squared = tf.reduce_sum(tf.square(normalized_grid[:, :2, :, :]), axis=1)

  undistortion_factor = quadratic_inverse_distortion_scale(
      distortion_coeff, radii_squared)
  undistortion_factor = tf.stack([
      undistortion_factor, undistortion_factor,
      tf.ones_like(undistortion_factor)
  ],
                                 axis=1)
  normalized_grid *= undistortion_factor

  rot_mat = transform_utils.matrix_from_angles(rotation_angles)
  # We have to treat separately the case of a per-image rotation vector and a
  # per-image rotation field, because the broadcasting capabilities of einsum
  # are limited.
  if rotation_angles.shape.ndims == 2:
    # The calculation here is identical to the one in inverse_warp above.
    # Howeverwe use einsum for better clarity. Under the hood, einsum performs
    # the reshaping and invocation of BatchMatMul, instead of doing it manually,
    # as in inverse_warp.
    pcoords = tf.einsum('bij,bjhw,bhw->bihw', rot_mat, normalized_grid, depth)
  elif rotation_angles.shape.ndims == 4:
    # We push the H and W dimensions to the end, and transpose the rotation
    # matrix elements (as noted above).
    rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2])
    pcoords = tf.einsum('bijhw,bjhw,bhw->bihw', rot_mat, normalized_grid, depth)

  pcoords += tf.transpose(translation, [0, 3, 1, 2])

  x, y, z = tf.unstack(pcoords, axis=1)
  x /= z
  y /= z
  scale = quadraric_distortion_scale(distortion_coeff,
                                     tf.square(x) + tf.square(y))
  x *= scale
  y *= scale

  pcoords = tf.einsum('bij,bjhw->bihw', intrinsic_mat,
                      tf.stack([x, y, tf.ones_like(x)], axis=1))
  x, y, _ = tf.unstack(pcoords, axis=1)

  return x, y, z
コード例 #5
0
def motion_field_consistency_loss(frame1transformed_pixelxy, mask, rotation1,
                                  translation1, rotation2, translation2):
    """Computes a cycle consistency loss between two motion maps.

  Given two rotation and translation maps (of two frames), and a mapping from
  one frame to the other, this function assists in imposing that the fields at
  frame 1 represent the opposite motion of the ones in frame 2.

  In other words: At any given pixel on frame 1, if we apply the translation and
  rotation designated at that pixel, we land on some pixel in frame 2, and if we
  apply the translation and rotation designated there, we land back at the
  original pixel at frame 1.

  Args:
    frame1transformed_pixelxy: A tf.Tensor of shape [B, H, W, 2] representing
      the motion-transformed location of each pixel in frame 1. It is assumed
      (but not verified) that frame1transformed_pixelxy was obtained by properly
      applying rotation1 and translation1 on the depth map of frame 1.
    mask: A tf.Tensor of shape [B, H, W, 2] expressing the weight of each pixel
      in the calculation of the consistency loss.
    rotation1: A tf.Tensor of shape [B, 3] representing rotation angles.
    translation1: A tf.Tensor of shape [B, H, W, 3] representing translation
      vectors.
    rotation2: A tf.Tensor of shape [B, 3] representing rotation angles.
    translation2: A tf.Tensor of shape [B, H, W, 3] representing translation
      vectors.

  Returns:
    A dicionary from string to tf.Tensor, with the following entries:
      rotation_error: A tf scalar, the rotation consistency error.
      translation_error: A tf scalar, the translation consistency error.
  """

    translation2resampled = tf.contrib.resampler.resampler(
        translation2, tf.stop_gradient(frame1transformed_pixelxy))
    rotation1field = tf.broadcast_to(_expand_dims_twice(rotation1, -2),
                                     tf.shape(translation1))
    rotation2field = tf.broadcast_to(_expand_dims_twice(rotation2, -2),
                                     tf.shape(translation2))
    rotation1matrix = transform_utils.matrix_from_angles(rotation1field)
    rotation2matrix = transform_utils.matrix_from_angles(rotation2field)

    rot_unit, trans_zero = transform_utils.combine(rotation2matrix,
                                                   translation2resampled,
                                                   rotation1matrix,
                                                   translation1)
    eye = tf.eye(3, batch_shape=tf.shape(rot_unit)[:-2])

    transform_utils.matrix_from_angles(rotation1field)  # Delete this later
    transform_utils.matrix_from_angles(rotation2field)  # Delete this later

    # We normalize the product of rotations by the product of their norms, to make
    # the loss agnostic of their magnitudes, only wanting them to be opposite in
    # directions. Otherwise the loss has a tendency to drive the rotations to
    # zero.
    rot_error = tf.reduce_mean(tf.square(rot_unit - eye), axis=(3, 4))
    rot1_scale = tf.reduce_mean(tf.square(rotation1matrix - eye), axis=(3, 4))
    rot2_scale = tf.reduce_mean(tf.square(rotation2matrix - eye), axis=(3, 4))
    rot_error /= (1e-24 + rot1_scale + rot2_scale)
    rotation_error = tf.reduce_mean(rot_error)

    def norm(x):
        return tf.reduce_sum(tf.square(x), axis=-1)

    # Here again, we normalize by the magnitudes, for the same reason.
    translation_error = tf.reduce_mean(
        mask * norm(trans_zero) /
        (1e-24 + norm(translation1) + norm(translation2)))

    return {
        'rotation_error': rotation_error,
        'translation_error': translation_error
    }