コード例 #1
0
 def __init__(self, validate_args=False, name="cholesky_to_inv_cholesky"):
     with tf.name_scope(name) as name:
         self._cholesky = CholeskyOuterProduct()
         super(CholeskyToInvCholesky,
               self).__init__(forward_min_event_ndims=2,
                              validate_args=validate_args,
                              name=name)
コード例 #2
0
 def __init__(self, validate_args=False, name='cholesky_to_inv_cholesky'):
     parameters = dict(locals())
     with tf.name_scope(name) as name:
         self._cholesky = CholeskyOuterProduct()
         super(CholeskyToInvCholesky,
               self).__init__(forward_min_event_ndims=2,
                              validate_args=validate_args,
                              parameters=parameters,
                              name=name)
コード例 #3
0
 def __init__(self, validate_args=False, name=None):
     super(CholeskyToInvCholesky,
           self).__init__(forward_min_event_ndims=2,
                          validate_args=validate_args,
                          name=name or "cholesky_to_inv_cholesky")
     self._cholesky = CholeskyOuterProduct()
     # No upside in additional `tf.function` decorating since
     # `self._cholesky` is "private."
     self._cholesky._use_tf_function = False  # pylint: disable=protected-access
コード例 #4
0
 def __init__(self, validate_args=False, name=None):
     super(CholeskyToInvCholesky,
           self).__init__(forward_min_event_ndims=2,
                          validate_args=validate_args,
                          name=name or "cholesky_to_inv_cholesky")
     self._cholesky = CholeskyOuterProduct()
コード例 #5
0
class CholeskyToInvCholesky(bijector.Bijector):
    """Maps the Cholesky factor of `M` to the Cholesky factor of `M^{-1}`.

  The `forward` and `inverse` calculations are conceptually identical to:

  ```python
  def forward(x):
    return tf.cholesky(tf.linalg.inv(tf.matmul(x, x, adjoint_b=True)))

  inverse = forward
  ```

  or, similarly,

  ```python
  tfb = tfp.bijectors
  CholeskyToInvCholesky = tfb.Chain([
      tfb.Invert(tfb.CholeskyOuterProduct()),
      tfb.MatrixInverse(),
      tfb.CholeskyOuterProduct(),
  ])
  ```

  However, the actual calculations exploit the triangular structure of the
  matrices.
  """
    def __init__(self, validate_args=False, name=None):
        super(CholeskyToInvCholesky,
              self).__init__(forward_min_event_ndims=2,
                             validate_args=validate_args,
                             name=name or "cholesky_to_inv_cholesky")
        self._cholesky = CholeskyOuterProduct()

    def _forward(self, x):
        with tf.control_dependencies(self._assertions(x)):
            x_shape = tf.shape(input=x)
            identity_matrix = tf.eye(x_shape[-1],
                                     batch_shape=x_shape[:-2],
                                     dtype=dtype_util.base_dtype(x.dtype))
            # Note `matrix_triangular_solve` implicitly zeros upper triangular of `x`.
            y = tf.linalg.triangular_solve(x, identity_matrix)
            y = tf.matmul(y, y, adjoint_a=True)
            return tf.linalg.cholesky(y)

    _inverse = _forward

    def _forward_log_det_jacobian(self, x):
        # CholeskyToInvCholesky.forward(X) is equivalent to
        # 1) M = CholeskyOuterProduct.forward(X)
        # 2) N = invert(M)
        # 3) Y = CholeskyOuterProduct.inverse(N)
        #
        # For step 1,
        #   |Jac(outerprod(X))| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
        # For step 2,
        #   |Jac(inverse(M))| = |M|^{-(p+1)} (because M is symmetric)
        #                     = |X|^{-2(p+1)} = (prod_{j=0}^{p-1} X[j,j])^{-2(p+1)}
        #   (see http://web.mit.edu/18.325/www/handouts/handout2.pdf sect 3.0.2)
        # For step 3,
        #   |Jac(Cholesky(N))| = -|Jac(outerprod(Y)|
        #                      = 2^p prod_{j=0}^{p-1} Y[j,j]^{p-j}
        n = tf.cast(tf.shape(input=x)[-1], x.dtype)
        y = self._forward(x)
        return (
            (self._cholesky.forward_log_det_jacobian(x, event_ndims=2) -
             (n + 1.) * tf.reduce_sum(
                 input_tensor=tf.math.log(tf.linalg.diag_part(x)), axis=-1)) -
            (self._cholesky.forward_log_det_jacobian(y, event_ndims=2) -
             (n + 1.) * tf.reduce_sum(
                 input_tensor=tf.math.log(tf.linalg.diag_part(y)), axis=-1)))

    _inverse_log_det_jacobian = _forward_log_det_jacobian

    def _assertions(self, x):
        if not self.validate_args:
            return []
        x_shape = tf.shape(input=x)
        is_matrix = assert_util.assert_rank_at_least(
            x, 2, message="Input must have rank at least 2.")
        is_square = assert_util.assert_equal(
            x_shape[-2], x_shape[-1], message="Input must be a square matrix.")
        diag_part_x = tf.linalg.diag_part(x)
        is_lower_triangular = assert_util.assert_equal(
            tf.linalg.band_part(x, 0, -1),  # Preserves triu, zeros rest.
            tf.linalg.diag(diag_part_x),
            message="Input must be lower triangular.")
        is_positive_diag = assert_util.assert_positive(
            diag_part_x,
            message="Input must have all positive diagonal entries.")
        return [is_matrix, is_square, is_lower_triangular, is_positive_diag]
コード例 #6
0
 def __init__(self, validate_args=False, name=None):
   super(CholeskyToInvCholesky, self).__init__(
       forward_min_event_ndims=2,
       validate_args=validate_args,
       name=name or "cholesky_to_inv_cholesky")
   self._cholesky = CholeskyOuterProduct()
コード例 #7
0
class CholeskyToInvCholesky(bijector.Bijector):
  """Maps the Cholesky factor of `M` to the Cholesky factor of `M^{-1}`.

  The `forward` and `inverse` calculations are conceptually identical to:

  ```python
  def forward(x):
    return tf.cholesky(tf.linalg.inv(tf.matmul(x, x, adjoint_b=True)))

  inverse = forward
  ```

  or, similarly,

  ```python
  tfb = tfp.bijectors
  CholeskyToInvCholesky = tfb.Chain([
      tfb.Invert(tfb.CholeskyOuterProduct()),
      tfb.MatrixInverse(),
      tfb.CholeskyOuterProduct(),
  ])
  ```

  However, the actual calculations exploit the triangular structure of the
  matrices.
  """

  def __init__(self, validate_args=False, name=None):
    super(CholeskyToInvCholesky, self).__init__(
        forward_min_event_ndims=2,
        validate_args=validate_args,
        name=name or "cholesky_to_inv_cholesky")
    self._cholesky = CholeskyOuterProduct()

  def _forward(self, x):
    with tf.control_dependencies(self._assertions(x)):
      x_shape = tf.shape(x)
      identity_matrix = tf.eye(
          x_shape[-1], batch_shape=x_shape[:-2], dtype=x.dtype.base_dtype)
      # Note `matrix_triangular_solve` implicitly zeros upper triangular of `x`.
      y = tf.matrix_triangular_solve(x, identity_matrix)
      y = tf.matmul(y, y, adjoint_a=True)
      return tf.cholesky(y)

  _inverse = _forward

  def _forward_log_det_jacobian(self, x):
    # CholeskyToInvCholesky.forward(X) is equivalent to
    # 1) M = CholeskyOuterProduct.forward(X)
    # 2) N = invert(M)
    # 3) Y = CholeskyOuterProduct.inverse(N)
    #
    # For step 1,
    #   |Jac(outerprod(X))| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
    # For step 2,
    #   |Jac(inverse(M))| = |M|^{-(p+1)} (because M is symmetric)
    #                     = |X|^{-2(p+1)} = (prod_{j=0}^{p-1} X[j,j])^{-2(p+1)}
    #   (see http://web.mit.edu/18.325/www/handouts/handout2.pdf sect 3.0.2)
    # For step 3,
    #   |Jac(Cholesky(N))| = -|Jac(outerprod(Y)|
    #                      = 2^p prod_{j=0}^{p-1} Y[j,j]^{p-j}
    n = tf.cast(tf.shape(x)[-1], x.dtype)
    y = self._forward(x)
    return (
        (self._cholesky.forward_log_det_jacobian(x, event_ndims=2) -
         (n + 1.) * tf.reduce_sum(tf.log(tf.matrix_diag_part(x)), axis=-1)) -
        (self._cholesky.forward_log_det_jacobian(y, event_ndims=2) -
         (n + 1.) * tf.reduce_sum(tf.log(tf.matrix_diag_part(y)), axis=-1)))

  _inverse_log_det_jacobian = _forward_log_det_jacobian

  def _assertions(self, x):
    if not self.validate_args:
      return []
    x_shape = tf.shape(x)
    is_matrix = tf.assert_rank_at_least(
        x, 2,
        message="Input must have rank at least 2.")
    is_square = tf.assert_equal(
        x_shape[-2], x_shape[-1],
        message="Input must be a square matrix.")
    diag_part_x = tf.matrix_diag_part(x)
    is_lower_triangular = tf.assert_equal(
        tf.matrix_band_part(x, 0, -1),  # Preserves triu, zeros rest.
        tf.matrix_diag(diag_part_x),
        message="Input must be lower triangular.")
    is_positive_diag = tf.assert_positive(
        diag_part_x,
        message="Input must have all positive diagonal entries.")
    return [is_matrix, is_square, is_lower_triangular, is_positive_diag]
コード例 #8
0
    def __init__(self,
                 base_kernel,
                 fixed_inputs,
                 diag_shift=None,
                 validate_args=False,
                 name='SchurComplement'):
        """Construct a SchurComplement kernel instance.

    Args:
      base_kernel: A `PositiveSemidefiniteKernel` instance, the kernel used to
        build the block matrices of which this kernel computes the  Schur
        complement.
      fixed_inputs: A Tensor, representing a collection of inputs. The Schur
        complement that this kernel computes comes from a block matrix, whose
        bottom-right corner is derived from `base_kernel.matrix(fixed_inputs,
        fixed_inputs)`, and whose top-right and bottom-left pieces are
        constructed by computing the base_kernel at pairs of input locations
        together with these `fixed_inputs`. `fixed_inputs` is allowed to be an
        empty collection (either `None` or having a zero shape entry), in which
        case the kernel falls back to the trivial application of `base_kernel`
        to inputs. See class-level docstring for more details on the exact
        computation this does; `fixed_inputs` correspond to the `Z` structure
        discussed there. `fixed_inputs` is assumed to have shape `[b1, ..., bB,
        N, f1, ..., fF]` where the `b`'s are batch shape entries, the `f`'s are
        feature_shape entries, and `N` is the number of fixed inputs. Use of
        this kernel entails a 1-time O(N^3) cost of computing the Cholesky
        decomposition of the k(Z, Z) matrix. The batch shape elements of
        `fixed_inputs` must be broadcast compatible with
        `base_kernel.batch_shape`.
      diag_shift: A floating point scalar to be added to the diagonal of the
        divisor_matrix before computing its Cholesky.
      validate_args: If `True`, parameters are checked for validity despite
        possibly degrading runtime performance.
        Default value: `False`
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `"SchurComplement"`
    """
        with tf.compat.v1.name_scope(name, values=[base_kernel,
                                                   fixed_inputs]) as name:
            # If the base_kernel doesn't have a specified dtype, we can't pass it off
            # to common_dtype, which always expects `tf.as_dtype(dtype)` to work (and
            # it doesn't if the given `dtype` is None.
            # TODO(b/130421035): Consider changing common_dtype to allow Nones, and
            # clean this up after.
            #
            # Thus, we spell out the logic
            # here: use the dtype of `fixed_inputs` if possible. If base_kernel.dtype
            # is not None, use the usual logic.
            if base_kernel.dtype is None:
                dtype = None if fixed_inputs is None else fixed_inputs.dtype
            else:
                dtype = dtype_util.common_dtype([base_kernel, fixed_inputs],
                                                tf.float32)
            self._base_kernel = base_kernel
            self._fixed_inputs = (None if fixed_inputs is None else
                                  tf.convert_to_tensor(value=fixed_inputs,
                                                       dtype=dtype))
            if not self._is_fixed_inputs_empty():
                # We create and store this matrix here, so that we get the caching
                # benefit when we later access its cholesky. If we computed the matrix
                # every time we needed the cholesky, the bijector cache wouldn't be hit.
                self._divisor_matrix = base_kernel.matrix(
                    fixed_inputs, fixed_inputs)
                if diag_shift is not None:
                    broadcast_shape = get_broadcast_shape(
                        self._divisor_matrix, diag_shift[..., tf.newaxis])
                    self._divisor_matrix = tf.broadcast_to(
                        self._divisor_matrix, broadcast_shape)
                    self._divisor_matrix = _add_diagonal_shift(
                        self._divisor_matrix, diag_shift)

            self._cholesky_bijector = Invert(CholeskyOuterProduct())
        super(SchurComplement, self).__init__(base_kernel.feature_ndims,
                                              dtype=dtype,
                                              name=name)