Ejemplo n.º 1
0
    def _build_egomotion_test_graph(self):
        """Builds graph for inference of egomotion given two images."""
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            self._image1 = tf.placeholder(
                tf.float32,
                [self.batch_size, self.img_height, self.img_width, 3],
                name='image1')
            self._image2 = tf.placeholder(
                tf.float32,
                [self.batch_size, self.img_height, self.img_width, 3],
                name='image2')
            # The "compute_loss" scope is needed for the checkpoint to load properly.
            with tf.name_scope('compute_loss'):
                rot, trans, _, _ = motion_prediction_net.motion_field_net(
                    images=tf.concat([self._image1, self._image2], axis=-1))
                inv_rot, inv_trans, _, _ = (
                    motion_prediction_net.motion_field_net(images=tf.concat(
                        [self._image2, self._image1], axis=-1)))

            rot = transform_utils.matrix_from_angles(rot)
            inv_rot = transform_utils.matrix_from_angles(inv_rot)
            trans = tf.squeeze(trans, axis=(1, 2))
            inv_trans = tf.squeeze(inv_trans, axis=(1, 2))

            # rot and inv_rot should be the inverses on of the other, but in reality
            # they slightly differ. Averaging rot and inv(inv_rot) gives a better
            # estimator for the rotation. Similarly, trans and rot*inv_trans should
            # be the negatives one of the other, so we average rot*inv_trans and trans
            # to get a better estimator. TODO(gariel): Check if there's an estimator
            # with less variance.
            self.rot = 0.5 * (tf.linalg.inv(inv_rot) + rot)
            self.trans = 0.5 * (-tf.squeeze(
                tf.matmul(self.rot, tf.expand_dims(inv_trans, -1)), axis=-1) +
                                trans)
Ejemplo n.º 2
0
def infer_ego_motion(rot, trans):
    """
        Infer ego motion (pose) using rot and trans matrix.
        Args:
            rot : rotation matrix.
            trans : translational matrix.
        Returns :
            avg_rot : rotation matrix for trajectory in world co-ordinates system.
            avg_trans : translation matrix for trajectory in world co-ordinates system.
    """
    rot12, rot21 = rot
    rot12 = matrix_from_angles(rot12)
    rot21 = matrix_from_angles(rot21)
    trans12, trans21 = trans

    avg_rot = 0.5 * (torch.linalg.inv(rot21) + rot12)
    avg_trans = 0.5 * (-torch.squeeze(
        torch.matmul(rot12, torch.unsqueeze(trans21, -1)), dim=-1) + trans12)
    return avg_rot, avg_trans
Ejemplo n.º 3
0
def _using_motion_vector_with_distortion(depth, translation, rotation_angles, intrinsic_mat, distortion_coeff=0.0):
    """A helper for using_motion_vector. See docstring therein."""

    if len(translation.shape) not in (2, 4):
        raise ValueError('\'translation\' should have rank 2 or 4, not %d' % len(translation.shape))
    if translation.shape[1] != 3:
        raise ValueError('translation\'s channel dimension should be 3, not %d' % translation.shape[1])
    if len(translation.shape) == 2:
        translation = torch.unsqueeze(torch.unsqueeze(translation, -1), -1)

    _, height, width = depth.shape
    grid = torch.stack(torch.meshgrid(torch.range(width), torch.range(height)))
    grid = grid.float()
    intrinsic_mat_inv = torch.inverse(intrinsic_mat)

    normalized_grid = torch.einsum('bij,jhw->bihw', intrinsic_mat_inv, grid)

    radii_squared = torch.sum(normalized_grid[:, :2, :, :].mul(normalized_grid), dim=1)

    undistortion_factor = quadratic_inverse_distortion_scale(distortion_coeff, radii_squared)
    undistortion_factor = torch.stack(
        [undistortion_factor, undistortion_factor, torch.ones_like(undistortion_factor)], dim=1)
    normalized_grid *= undistortion_factor

    rot_mat = transform_utils.matrix_from_angles(rotation_angles)
    # We have to treat separately the case of a per-image rotation vector and a
    # per-image rotation field, because the broadcasting capabilities of einsum
    # are limited.
    if len(rotation_angles.shape) == 2:
        # The calculation here is identical to the one in inverse_warp above.
        # Howeverwe use einsum for better clarity. Under the hood, einsum performs
        # the reshaping and invocation of BatchMatMul, instead of doing it manually,
        # as in inverse_warp.
        pcoords = torch.einsum('bij,bjhw,bhw->bihw', rot_mat, normalized_grid, depth)
    elif len(rotation_angles.shape) == 4:
        # We push the H and W dimensions to the end, and transpose the rotation
        # matrix elements (as noted above).
        rot_mat = rot_mat.permute(0, 3, 4, 1, 2)
        pcoords = torch.einsum('bijhw,bjhw,bhw->bihw', rot_mat, normalized_grid, depth)

    pcoords += translation.permute(0, 3, 1, 2)

    x, y, z = torch.unbind(pcoords, dim=1)
    x /= z
    y /= z
    scale = quadraric_distortion_scale(distortion_coeff, x.mul(x) + y.mul(y))
    x *= scale
    y *= scale

    pcoords = torch.einsum('bij,bjhw->bihw', intrinsic_mat, torch.stack([x, y, torch.ones_like(x)], dim=1))
    x, y, _ = torch.unbind(pcoords, dim=1)

    return x, y, z
Ejemplo n.º 4
0
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat):
    """A helper for using_motion_vector. See docstring therein."""

    if translation.shape.ndims not in (2, 4):
        raise ValueError('\'translation\' should have rank 2 or 4, not %d' %
                         translation.shape.ndims)
    if translation.shape[-1] != 3:
        raise ValueError('translation\'s last dimension should be 3, not %d' %
                         translation.shape[1])
    if translation.shape.ndims == 2:
        translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

    _, height, width = tf.unstack(tf.shape(depth))
    grid = tf.squeeze(tf.stack(
        tf.meshgrid(tf.range(width), tf.range(height), (1, ))),
                      axis=3)
    grid = tf.to_float(grid)
    intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

    rot_mat = transform_utils.matrix_from_angles(rotation_angles)
    # We have to treat separately the case of a per-image rotation vector and a
    # per-image rotation field, because the broadcasting capabilities of einsum
    # are limited.
    if rotation_angles.shape.ndims == 2:
        # The calculation here is identical to the one in inverse_warp above.
        # Howeverwe use einsum for better clarity. Under the hood, einsum performs
        # the reshaping and invocation of BatchMatMul, instead of doing it manually,
        # as in inverse_warp.
        projected_rotation = tf.einsum('bij,bjk,bkl->bil', intrinsic_mat,
                                       rot_mat, intrinsic_mat_inv)
        pcoords = tf.einsum('bij,jhw,bhw->bihw', projected_rotation, grid,
                            depth)
    elif rotation_angles.shape.ndims == 4:
        # We push the H and W dimensions to the end, and transpose the rotation
        # matrix elements (as noted above).
        rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2])
        projected_rotation = tf.einsum('bij,bjkhw,bkl->bilhw', intrinsic_mat,
                                       rot_mat, intrinsic_mat_inv)
        pcoords = tf.einsum('bijhw,jhw,bhw->bihw', projected_rotation, grid,
                            depth)

    projected_translation = tf.einsum('bij,bhwj->bihw', intrinsic_mat,
                                      translation)
    pcoords += projected_translation
    x, y, z = tf.unstack(pcoords, axis=1)
    return x / z, y / z, z
Ejemplo n.º 5
0
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat):
    """A helper for using_motion_vector. See docstring therein."""

    if len(translation.shape) not in (2, 4):
        raise ValueError('\'translation\' should have rank 2 or 4, not %d' % len(translation.shape))
    if translation.shape[-1] != 3:
        raise ValueError('translation\'s channel dimension should be 3, not %d' % translation.shape[1])
    if len(translation.shape) == 2:
        translation = torch.unsqueeze(torch.unsqueeze(translation, -1), -1)

    _, _, height, width = depth.shape
    lista, listb = np.meshgrid(np.arange(width), np.arange(height))
    listc = np.ones_like(listb)
    grid = torch.Tensor([lista, listb, listc]).float().cuda()  # TODO:自适应运算设备
    intrinsic_mat_inv = torch.inverse(intrinsic_mat)

    rot_mat = transform_utils.matrix_from_angles(rotation_angles)
    # We have to treat separately the case of a per-image rotation vector and a
    # per-image rotation field, because the broadcasting capabilities of einsum
    # are limited.
    if len(rotation_angles.shape) == 2:
        # The calculation here is identical to the one in inverse_warp above.
        # Howeverwe use einsum for better clarity. Under the hood, einsum performs
        # the reshaping and invocation of BatchMatMul, instead of doing it manually,
        # as in inverse_warp.
        projected_rotation = torch.einsum('bij,bjk,bkl->bil', intrinsic_mat, rot_mat, intrinsic_mat_inv)  # K*R*inv_K
        pcoords = torch.einsum('bij,jhw,bhw->bihw', projected_rotation, grid, depth.squeeze())
    elif len(rotation_angles.shape) == 4:
        # We push the H and W dimensions to the end, and transpose the rotation matrix elements (as noted above).
        rot_mat = rot_mat.permute(0, 3, 4, 1, 2)
        projected_rotation = torch.einsum('bij,bjkhw,bkl->bilhw', intrinsic_mat, rot_mat, intrinsic_mat_inv)
        pcoords = torch.einsum('bijhw,jhw,bhw->bihw', projected_rotation, grid, depth.squeeze())

    projected_translation = torch.einsum('bij,bhwj->bihw', intrinsic_mat, translation)  # Kt [4,3,3] [4,128,416,3]
    pcoords += projected_translation
    x, y, z = torch.unbind(pcoords, dim=1)
    return x / z, y / z, z
Ejemplo n.º 6
0
def motion_field_consistency_loss(frame1transformed_pixelxy, mask, rotation1,
                                  translation1, rotation2, translation2):
    """Computes a cycle consistency loss between two motion maps.

    Given two rotation and translation maps (of two frames), and a mapping from
    one frame to the other, this function assists in imposing that the fields at
    frame 1 represent the opposite motion of the ones in frame 2.

    In other words: At any given pixel on frame 1, if we apply the translation and
    rotation designated at that pixel, we land on some pixel in frame 2, and if we
    apply the translation and rotation designated there, we land back at the
    original pixel at frame 1.

    Args:
      frame1transformed_pixelxy: A tf.Tensor of shape [B, H, W, 2] representing
        the motion-transformed location of each pixel in frame 1. It is assumed
        (but not verified) that frame1transformed_pixelxy was obtained by properly
        applying rotation1 and translation1 on the depth map of frame 1.
      mask: A tf.Tensor of shape [B, H, W, 2] expressing the weight of each pixel
        in the calculation of the consistency loss.
      rotation1: A tf.Tensor of shape [B, 3] representing rotation angles.
      translation1: A tf.Tensor of shape [B, H, W, 3] representing translation
        vectors.
      rotation2: A tf.Tensor of shape [B, 3] representing rotation angles.
      translation2: A tf.Tensor of shape [B, H, W, 3] representing translation
        vectors.

    Returns:
      A dicionary from string to tf.Tensor, with the following entries:
        rotation_error: A tf scalar, the rotation consistency error.
        translation_error: A tf scalar, the translation consistency error.
    """

    translation2resampled = tf.contrib.resampler.resampler(
        translation2, tf.stop_gradient(frame1transformed_pixelxy))
    rotation1field = tf.broadcast_to(_expand_dims_twice(rotation1, -2),
                                     tf.shape(translation1))
    rotation2field = tf.broadcast_to(_expand_dims_twice(rotation2, -2),
                                     tf.shape(translation2))
    rotation1matrix = transform_utils.matrix_from_angles(rotation1field)
    rotation2matrix = transform_utils.matrix_from_angles(rotation2field)

    rot_unit, trans_zero = transform_utils.combine(rotation2matrix,
                                                   translation2resampled,
                                                   rotation1matrix,
                                                   translation1)
    eye = tf.eye(3, batch_shape=tf.shape(rot_unit)[:-2])

    transform_utils.matrix_from_angles(rotation1field)  # Delete this later
    transform_utils.matrix_from_angles(rotation2field)  # Delete this later

    # We normalize the product of rotations by the product of their norms, to make
    # the loss agnostic of their magnitudes, only wanting them to be opposite in
    # directions. Otherwise the loss has a tendency to drive the rotations to
    # zero.
    rot_error = tf.reduce_mean(tf.square(rot_unit - eye), axis=(3, 4))
    rot1_scale = tf.reduce_mean(tf.square(rotation1matrix - eye), axis=(3, 4))
    rot2_scale = tf.reduce_mean(tf.square(rotation2matrix - eye), axis=(3, 4))
    rot_error /= (1e-24 + rot1_scale + rot2_scale)
    rotation_error = tf.reduce_mean(rot_error)

    def norm(x):
        return tf.reduce_sum(tf.square(x), axis=-1)

    # Here again, we normalize by the magnitudes, for the same reason.
    translation_error = tf.reduce_mean(
        mask * norm(trans_zero) /
        (1e-24 + norm(translation1) + norm(translation2)))

    return {
        'rotation_error': rotation_error,
        'translation_error': translation_error
    }
Ejemplo n.º 7
0
    def __Cyclic_Consistency_Loss(self, frame1transformed_depth_pixelxy, mask, rotation1, translation1, rotation2,
                                  translation2):
        """
               计算两个运动图之间的循环一致性损失。
           在第1帧的任何给定像素处,如果我们应用在该像素处指定的平移和旋转,
           则变换到第2帧的某个像素处,然后在该像素上应用在该像素处指定的平移和旋转,我们将回到第1帧的原始像素那个位置.

         Args:
           frame1transformed_depth_pixelxy: A tf.Tensor of shape [B, H, W, 2]
           representing the motion-transformed location of each pixel in frame 1.
           It is assumed (but not verified) that
           frame1transformed_pixelxy was obtained by properlyapplying rotation1 and translation1 on the depth map of frame 1.
           mask: 张量 [b,H,W,2] 表示一致性损失的计算中的每个像素的权重。__Depth_Consistency_Loss里面的frame1_closer_to_camera
           rotation1:  [B, 3] 旋转角 1->2
           translation1: [B, H, W, 3] 平移向量场(由每个像素的平移向量构成),1->2
           rotation2: [B, 3] 旋转角 2->1
           translation2: [B, H, W, 3] 平移向量场(由每个像素的平移向量构成),2->1

         Returns:
           A dicionary from string to tf.Tensor, with the following entries:
             rotation_error: A tf scalar, the rotation consistency error.
             translation_error: A tf scalar, the translation consistency error.
         """
        translation2resampled = resample(translation2.permute(0, 3, 1, 2),
                                         frame1transformed_depth_pixelxy).permute(0, 2, 3, 1)  # stop_gradient

        def _expand_dims_twice(x, dim):
            return torch.unsqueeze(torch.unsqueeze(x, dim), dim)

        rotation1field, _ = torch.broadcast_tensors(_expand_dims_twice(rotation1, -2),
                                                    translation1)  # translation1 [4,128,416,3]
        rotation2field, _ = torch.broadcast_tensors(_expand_dims_twice(rotation2, -2), translation2)
        rotation1matrix = transform_utils.matrix_from_angles(rotation1field)
        rotation2matrix = transform_utils.matrix_from_angles(rotation2field)

        rot_unit, trans_zero = transform_utils.combine(rotation2matrix, translation2resampled,
                                                       rotation1matrix, translation1)

        eye_shape = rot_unit.shape[:-2] + (1, 1)
        eye = torch.eye(3).unsqueeze(0).repeat(eye_shape).cuda()  # TODO:自适应运算设备

        transform_utils.matrix_from_angles(rotation1field)  # Delete this later
        transform_utils.matrix_from_angles(rotation2field)  # Delete this later

        # We normalize the product of rotations by the product of their norms, to make
        # the loss agnostic of their magnitudes, only wanting them to be opposite in
        # directions. Otherwise the loss has a tendency to drive the rotations to
        # zero.
        rot_error = torch.mean((rot_unit - eye).pow(2), dim=(3, 4))
        rot1_scale = torch.mean((rotation1matrix - eye).pow(2), dim=(3, 4))
        rot2_scale = torch.mean((rotation2matrix - eye).pow(2), dim=(3, 4))
        rot_error /= (1e-24 + rot1_scale + rot2_scale)
        rotation_error = rot_error.mean()

        def norm(x):
            return x.pow(2).sum(-1)

        # Here again, we normalize by the magnitudes, for the same reason.
        translation_error = (mask * norm(trans_zero) / (1e-24 + norm(translation1) + norm(translation2))).mean()

        return rotation_error, translation_error
Ejemplo n.º 8
0
def motion_field_consistency_loss(frame1transformed_pixelx,
                                  frame1transformed_pixely, mask, rotation1,
                                  translation1, rotation2, translation2):
    """Computes a cycle consistency loss between two motion maps.
    Given two rotation and translation maps (of two frames), and a mapping from
    one frame to the other, this function assists in imposing that the fields at
    frame 1 represent the opposite motion of the ones in frame 2.
    In other words: At any given pixel on frame 1, if we apply the translation and
    rotation designated at that pixel, we land on some pixel in frame 2, and if we
    apply the translation and rotation designated there, we land back at the
    original pixel at frame 1.
    Args:
    frame1transformed_pixelx: A torch.Tensor of shape [B, H, W] representing the
        motion-transformed x-location of each pixel in frame 1.
    frame1transformed_pixely: A torch.Tensor of shape [B, H, W] representing the
        motion-transformed y-location of each pixel in frame 1.
    mask: A torch.Tensor of shape [B, H, W, 2] expressing the weight of each pixel
        in the calculation of the consistency loss.
    rotation1: A torch.Tensor of shape [B, 3] representing rotation angles.
    translation1: A torch.Tensor of shape [B, H, W, 3] representing translation
        vectors.
    rotation2: A torch.Tensor of shape [B, 3] representing rotation angles.
    translation2: A torch.Tensor of shape [B, H, W, 3] representing translation
        vectors.
    Returns:
    A dicionary from string to torch.Tensor, with the following entries:
        rotation_error: A tf scalar, the rotation consistency error.
        translation_error: A tf scalar, the translation consistency error.
    """

    translation2resampled = resampler.resampler_with_unstacked_warp(
        translation2,
        frame1transformed_pixelx.detach(),
        frame1transformed_pixely.detach(),
        safe=False)
    translation2resampled = translation2resampled.view(-1, 128, 416, 3)
    rotation1field = _expand_dims_twice(rotation1,
                                        -2).expand(translation1.shape)
    rotation2field = _expand_dims_twice(rotation2,
                                        -2).expand(translation2.shape)
    rotation1matrix = transform_utils.matrix_from_angles(rotation1field)
    rotation2matrix = transform_utils.matrix_from_angles(rotation2field)

    rot_unit, trans_zero = transform_utils.combine(rotation2matrix,
                                                   translation2resampled,
                                                   rotation1matrix,
                                                   translation1)
    eye = torch.eye(3).to(
        device=rot_unit.device)  #batch_shape=rot_unit.shape[:-2]
    for i in range(len(rot_unit.shape[:-2])):
        eye = eye.unsqueeze(0)
    eye = eye.repeat(*rot_unit.shape[:-2], 1, 1)

    # We normalize the product of rotations by the product of their norms, to make
    # the loss agnostic of their magnitudes, only wanting them to be opposite in
    # directions. Otherwise the loss has a tendency to drive the rotations to
    # zero.
    rot_error = torch.mean(torch.square(rot_unit - eye), dim=(3, 4))
    rot1_scale = torch.mean(torch.square(rotation1matrix - eye), dim=(3, 4))
    rot2_scale = torch.mean(torch.square(rotation2matrix - eye), dim=(3, 4))
    rot_error = rot_error / (1e-24 + rot1_scale + rot2_scale)
    rotation_error = torch.mean(rot_error)

    def norm(x):
        return torch.sum(torch.square(x), dim=-1)

    # Here again, we normalize by the magnitudes, for the same reason.
    translation_error = torch.mean(
        torch.mul(
            mask,
            norm(trans_zero) /
            (1e-24 + norm(translation1) + norm(translation2resampled))))

    return {
        'rotation_error': rotation_error,
        'translation_error': translation_error
    }
Ejemplo n.º 9
0
def _using_motion_vector_with_distortion(depth,
                                         translation,
                                         rotation_angles,
                                         intrinsic_mat,
                                         distortion_coeff=0.0):
    """A helper for using_motion_vector. See docstring therein."""

    if translation.shape.ndims not in (2, 4):
        raise ValueError('\'translation\' should have rank 2 or 4, not %d' %
                         translation.shape.ndims)
    if translation.shape[-1] != 3:
        raise ValueError('translation\'s last dimension should be 3, not %d' %
                         translation.shape[1])
    if translation.shape.ndims == 2:
        translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

    _, height, width = tf.unstack(tf.shape(depth))
    grid = tf.squeeze(tf.stack(
        tf.meshgrid(tf.range(width), tf.range(height), (1, ))),
                      axis=3)  # 3 x height x width
    grid = tf.to_float(grid)
    intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

    normalized_grid = tf.einsum('bij,jhw->bihw', intrinsic_mat_inv, grid)
    radii_squared = tf.reduce_sum(tf.square(normalized_grid[:, :2, :, :]),
                                  axis=1)

    undistortion_factor = quadratic_inverse_distortion_scale(
        distortion_coeff, radii_squared)
    undistortion_factor = tf.stack([
        undistortion_factor, undistortion_factor,
        tf.ones_like(undistortion_factor)
    ],
                                   axis=1)
    normalized_grid *= undistortion_factor

    rot_mat = transform_utils.matrix_from_angles(rotation_angles)
    # We have to treat separately the case of a per-image rotation vector and a
    # per-image rotation field, because the broadcasting capabilities of einsum
    # are limited.
    if rotation_angles.shape.ndims == 2:
        # The calculation here is identical to the one in inverse_warp above.
        # Howeverwe use einsum for better clarity. Under the hood, einsum performs
        # the reshaping and invocation of BatchMatMul, instead of doing it manually,
        # as in inverse_warp.
        pcoords = tf.einsum('bij,bjhw,bhw->bihw', rot_mat, normalized_grid,
                            depth)
    elif rotation_angles.shape.ndims == 4:
        # We push the H and W dimensions to the end, and transpose the rotation
        # matrix elements (as noted above).
        rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2])
        pcoords = tf.einsum('bijhw,bjhw,bhw->bihw', rot_mat, normalized_grid,
                            depth)

    pcoords += tf.transpose(translation, [0, 3, 1, 2])

    x, y, z = tf.unstack(pcoords, axis=1)
    x /= z
    y /= z
    scale = quadraric_distortion_scale(distortion_coeff,
                                       tf.square(x) + tf.square(y))
    x *= scale
    y *= scale

    pcoords = tf.einsum('bij,bjhw->bihw', intrinsic_mat,
                        tf.stack([x, y, tf.ones_like(x)], axis=1))
    x, y, _ = tf.unstack(pcoords, axis=1)

    return x, y, z