Beispiel #1
0
    def variance(self, name="variance"):
        """Variance of the Wishart distribution.

    This function should not be confused with the covariance of the Wishart. The
    covariance matrix would have shape `q x q` where,
    `q = dimension * (dimension+1) / 2`
    and having elements corresponding to some mapping from a lower-triangular
    matrix to a vector-space.

    This function returns the diagonal of the Covariance matrix but shaped
    as a `dimension x dimension` matrix.

    Args:
      name: The name of this op.

    Returns:
      variance: `Tensor` of dtype `self.dtype`.
    """
        with ops.name_scope(self.name):
            with ops.name_scope(name, values=list(self.inputs.values())):
                x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
                d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1)
                v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True)
                if self.cholesky_input_output_matrices:
                    return linalg_ops.batch_cholesky(v)
                else:
                    return v
Beispiel #2
0
  def variance(self, name='variance'):
    """Variance of the Wishart distribution.

    This function should not be confused with the covariance of the Wishart. The
    covariance matrix would have shape `q x q` where,
    `q = dimension * (dimension+1) / 2`
    and having elements corresponding to some mapping from a lower-triangular
    matrix to a vector-space.

    This function returns the diagonal of the Covariance matrix but shaped
    as a `dimension x dimension` matrix.

    Args:
      name: The name of this op.

    Returns:
      variance: `Tensor` of dtype `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=list(self.inputs.values())):
        x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
        d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1)
        v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True)
        if self.cholesky_input_output_matrices:
          return linalg_ops.batch_cholesky(v)
        else:
          return v
Beispiel #3
0
 def _variance(self):
   x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
   d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1)
   v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True)
   if self.cholesky_input_output_matrices:
     return linalg_ops.batch_cholesky(v)
   return v
Beispiel #4
0
 def _variance(self):
     x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
     d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1)
     v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True)
     if self.cholesky_input_output_matrices:
         return linalg_ops.batch_cholesky(v)
     return v
Beispiel #5
0
 def std(self, name='std'):
   """Standard deviation of the Wishart distribution."""
   with ops.name_scope(self.name):
     with ops.name_scope(name, values=list(self.inputs.values())):
       if self.cholesky_input_output_matrices:
         raise ValueError(
             'Computing std. dev. when is cholesky_input_output_matrices=True '
             'does not make sense.')
       return linalg_ops.batch_cholesky(self.variance())
Beispiel #6
0
 def std(self, name="std"):
     """Standard deviation of the Wishart distribution."""
     with ops.name_scope(self.name):
         with ops.name_scope(name, values=list(self.inputs.values())):
             if self.cholesky_input_output_matrices:
                 raise ValueError(
                     "Computing std. dev. when is cholesky_input_output_matrices=True " "does not make sense."
                 )
             return linalg_ops.batch_cholesky(self.variance())
  def __init__(self, matrix, verify_pd=True, name='OperatorPDFull'):
    """Initialize an OperatorPDFull.

    Args:
      matrix:  Shape `[N1,...,Nb, k, k]` tensor with `b >= 0`, `k >= 1`.  The
        last two dimensions should be `k x k` symmetric positive definite
        matrices.
      verify_pd: Whether to check that `matrix` is symmetric positive definite.
        If `verify_pd` is `False`, correct behavior is not guaranteed.
      name:  A name to prepend to all ops created by this class.
    """
    with ops.name_scope(name):
      with ops.name_scope('init', values=[matrix]):
        matrix = ops.convert_to_tensor(matrix)
        # Check symmetric here.  Positivity will be verified by checking the
        # diagonal of the Cholesky factor inside the parent class.  The Cholesky
        # factorization .batch_cholesky() does not always fail for non PSD
        # matrices, so don't rely on that.
        if verify_pd:
          matrix = _check_symmetric(matrix)
        chol = linalg_ops.batch_cholesky(matrix)
        super(OperatorPDFull, self).__init__(chol, verify_pd=verify_pd)
  def _chol_capacitance(self, batch_mode):
    """Cholesky factorization of the capacitance term."""
    # Cholesky factor for (D^{-1} + V^T M^{-1} V), which is sometimes
    # known as the "capacitance" matrix.

    # self._operator will use batch if need be. Automatically.  We cannot force
    # that here.
    # M^{-1} V
    minv_v = self._operator.solve(self._v)
    # V^T M^{-1} V
    if batch_mode:
      vt_minv_v = math_ops.batch_matmul(self._v, minv_v, adj_x=True)
    else:
      vt_minv_v = math_ops.matmul(self._v, minv_v, transpose_a=True)

    # D^{-1} + V^T M^{-1} V
    capacitance = self._diag_inv_operator.add_to_tensor(vt_minv_v)
    # Cholesky[D^{-1} + V^T M^{-1} V]
    if batch_mode:
      return linalg_ops.batch_cholesky(capacitance)
    else:
      return linalg_ops.cholesky(capacitance)
Beispiel #9
0
    def __init__(self, mu, sigma=None, sigma_chol=None, name=None):
        """Multivariate Normal distributions on `R^k`.

    User must provide means `mu`, which are tensors of rank `N+1` (`N >= 0`)
    with the last dimension having length `k`.

    User must provide exactly one of `sigma` (the covariance matrices) or
    `sigma_chol` (the cholesky decompositions of the covariance matrices).
    `sigma` or `sigma_chol` must be of rank `N+2`.  The last two dimensions
    must both have length `k`.  The first `N` dimensions correspond to batch
    indices.

    If `sigma_chol` is not provided, the batch cholesky factorization of `sigma`
    is calculated for you.

    The shapes of `mu` and `sigma` must match for the first `N` dimensions.

    Regardless of which parameter is provided, the covariance matrices must all
    be **positive definite** (an error is raised if one of them is not).

    Args:
      mu: (N+1)-D.  `float` or `double` tensor, the means of the distributions.
      sigma: (N+2)-D.  (optional) `float` or `double` tensor, the covariances
        of the distribution(s).  The first `N+1` dimensions must match
        those of `mu`.  Must be batch-positive-definite.
      sigma_chol: (N+2)-D.  (optional) `float` or `double` tensor, a
        lower-triangular factorization of `sigma`
        (`sigma = sigma_chol . sigma_chol^*`).  The first `N+1` dimensions
        must match those of `mu`.  The tensor itself need not be batch
        lower triangular: we ignore the upper triangular part.  However,
        the batch diagonals must be positive (i.e., sigma_chol must be
        batch-positive-definite).
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if neither sigma nor sigma_chol is provided.
      TypeError: if mu and sigma (resp. sigma_chol) are different dtypes.
    """
        if (sigma is None) == (sigma_chol is None):
            raise ValueError(
                "Exactly one of sigma and sigma_chol must be provided")

        with ops.op_scope([mu, sigma, sigma_chol], name, "MultivariateNormal"):
            sigma_or_half = sigma_chol if sigma is None else sigma

            mu = ops.convert_to_tensor(mu)
            sigma_or_half = ops.convert_to_tensor(sigma_or_half)

            contrib_tensor_util.assert_same_float_dtype((mu, sigma_or_half))

            with ops.control_dependencies(
                [_assert_compatible_shapes(mu, sigma_or_half)]):
                mu = array_ops.identity(mu, name="mu")

                # Store the dimensionality of the MVNs
                self._k = array_ops.gather(array_ops.shape(mu),
                                           array_ops.rank(mu) - 1)

                if sigma_chol is not None:
                    # Ensure we only keep the lower triangular part.
                    sigma_chol = array_ops.batch_matrix_band_part(sigma_chol,
                                                                  num_lower=-1,
                                                                  num_upper=0)
                    sigma_det = _determinant_from_sigma_chol(sigma_chol)
                    with ops.control_dependencies(
                        [_assert_batch_positive_definite(sigma_chol)]):
                        self._sigma = math_ops.batch_matmul(sigma_chol,
                                                            sigma_chol,
                                                            adj_y=True,
                                                            name="sigma")
                        self._sigma_chol = array_ops.identity(
                            sigma_chol, "sigma_chol")
                        self._sigma_det = array_ops.identity(
                            sigma_det, "sigma_det")
                        self._mu = array_ops.identity(mu, "mu")
                else:  # sigma is not None
                    sigma_chol = linalg_ops.batch_cholesky(sigma)
                    sigma_det = _determinant_from_sigma_chol(sigma_chol)
                    # batch_cholesky checks for PSD; so we can just use it here.
                    with ops.control_dependencies([sigma_chol]):
                        self._sigma = array_ops.identity(sigma, "sigma")
                        self._sigma_chol = array_ops.identity(
                            sigma_chol, "sigma_chol")
                        self._sigma_det = array_ops.identity(
                            sigma_det, "sigma_det")
                        self._mu = array_ops.identity(mu, "mu")
Beispiel #10
0
  def __init__(self, mu, sigma=None, sigma_chol=None, name=None):
    """Multivariate Normal distributions on `R^k`.

    User must provide means `mu`, which are tensors of rank `N+1` (`N >= 0`)
    with the last dimension having length `k`.

    User must provide exactly one of `sigma` (the covariance matrices) or
    `sigma_chol` (the cholesky decompositions of the covariance matrices).
    `sigma` or `sigma_chol` must be of rank `N+2`.  The last two dimensions
    must both have length `k`.  The first `N` dimensions correspond to batch
    indices.

    If `sigma_chol` is not provided, the batch cholesky factorization of `sigma`
    is calculated for you.

    The shapes of `mu` and `sigma` must match for the first `N` dimensions.

    Regardless of which parameter is provided, the covariance matrices must all
    be **positive definite** (an error is raised if one of them is not).

    Args:
      mu: (N+1)-D.  `float` or `double` tensor, the means of the distributions.
      sigma: (N+2)-D.  (optional) `float` or `double` tensor, the covariances
        of the distribution(s).  The first `N+1` dimensions must match
        those of `mu`.  Must be batch-positive-definite.
      sigma_chol: (N+2)-D.  (optional) `float` or `double` tensor, a
        lower-triangular factorization of `sigma`
        (`sigma = sigma_chol . sigma_chol^*`).  The first `N+1` dimensions
        must match those of `mu`.  The tensor itself need not be batch
        lower triangular: we ignore the upper triangular part.  However,
        the batch diagonals must be positive (i.e., sigma_chol must be
        batch-positive-definite).
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if neither sigma nor sigma_chol is provided.
      TypeError: if mu and sigma (resp. sigma_chol) are different dtypes.
    """
    if (sigma is None) == (sigma_chol is None):
      raise ValueError("Exactly one of sigma and sigma_chol must be provided")

    with ops.op_scope([mu, sigma, sigma_chol], name, "MultivariateNormal"):
      sigma_or_half = sigma_chol if sigma is None else sigma

      mu = ops.convert_to_tensor(mu)
      sigma_or_half = ops.convert_to_tensor(sigma_or_half)

      contrib_tensor_util.assert_same_float_dtype((mu, sigma_or_half))

      with ops.control_dependencies([
          _assert_compatible_shapes(mu, sigma_or_half)]):
        mu = array_ops.identity(mu, name="mu")

        # Store the dimensionality of the MVNs
        self._k = array_ops.gather(array_ops.shape(mu), array_ops.rank(mu) - 1)

        if sigma_chol is not None:
          # Ensure we only keep the lower triangular part.
          sigma_chol = array_ops.batch_matrix_band_part(
              sigma_chol, num_lower=-1, num_upper=0)
          sigma_det = _determinant_from_sigma_chol(sigma_chol)
          with ops.control_dependencies([
              _assert_batch_positive_definite(sigma_chol)]):
            self._sigma = math_ops.batch_matmul(
                sigma_chol, sigma_chol, adj_y=True, name="sigma")
            self._sigma_chol = array_ops.identity(sigma_chol, "sigma_chol")
            self._sigma_det = array_ops.identity(sigma_det, "sigma_det")
            self._mu = array_ops.identity(mu, "mu")
        else:  # sigma is not None
          sigma_chol = linalg_ops.batch_cholesky(sigma)
          sigma_det = _determinant_from_sigma_chol(sigma_chol)
          # batch_cholesky checks for PSD; so we can just use it here.
          with ops.control_dependencies([sigma_chol]):
            self._sigma = array_ops.identity(sigma, "sigma")
            self._sigma_chol = array_ops.identity(sigma_chol, "sigma_chol")
            self._sigma_det = array_ops.identity(sigma_det, "sigma_det")
            self._mu = array_ops.identity(mu, "mu")
Beispiel #11
0
  def log_prob(self, x, name='log_prob'):
    """Log of the probability density/mass function.

    Args:
      x: `float` or `double` `Tensor`.
      name: The name to give this op.

    Returns:
      log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
        values of type `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[x] + list(self.inputs.values())):
        x = ops.convert_to_tensor(x, name='x')
        contrib_tensor_util.assert_same_float_dtype(
            (self.scale_operator_pd, x))
        if self.cholesky_input_output_matrices:
          x_sqrt = x
        else:
          # Complexity: O(nbk^3)
          x_sqrt = linalg_ops.batch_cholesky(x)

        batch_shape = self.batch_shape()
        event_shape = self.event_shape()
        ndims = array_ops.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2
        sample_shape = array_ops.slice(
            array_ops.shape(x_sqrt), [0], [sample_ndims])

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix.  Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk^2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = array_ops.concat(0, (math_ops.range(sample_ndims, ndims),
                                    math_ops.range(0, sample_ndims)))
        scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm)
        shape = array_ops.concat(
            0, (batch_shape,
                (math_ops.cast(self.dimension, dtype=dtypes.int32), -1)))
        scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving
        # a vector system.  E.g., for OperatorPDDiag, each solve is O(k), so
        # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is
        # O(k^2) so this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk^2)
        shape = array_ops.concat(0, (batch_shape, event_shape, sample_shape))
        scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = array_ops.concat(0, (math_ops.range(ndims - sample_ndims, ndims),
                                    math_ops.range(0, ndims - sample_ndims)))
        scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}^2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk^2)
        trace_scale_inv_x = math_ops.reduce_sum(
            math_ops.square(scale_sqrt_inv_x_sqrt),
            reduction_indices=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = math_ops.reduce_sum(
            math_ops.log(array_ops.batch_matrix_diag_part(x_sqrt)),
            reduction_indices=[-1])

        # Complexity: O(nbk^2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x -
                    self.log_normalizing_constant())

        # Set shape hints.
        # Try to merge what we know from the input then what we know from the
        # parameters of this distribution.
        if x.get_shape().ndims is not None:
          log_prob.set_shape(x.get_shape()[:-2])
        if (log_prob.get_shape().ndims is not None and
            self.get_batch_shape().ndims is not None and
            self.get_batch_shape().ndims > 0):
          log_prob.get_shape()[-self.get_batch_shape().ndims:].merge_with(
              self.get_batch_shape())

        return log_prob
Beispiel #12
0
 def _std(self):
   if self.cholesky_input_output_matrices:
     raise ValueError(
         "Computing std. dev. when is cholesky_input_output_matrices=True "
         "does not make sense.")
   return linalg_ops.batch_cholesky(self.variance())
Beispiel #13
0
  def _log_prob(self, x):
    if self.cholesky_input_output_matrices:
      x_sqrt = x
    else:
      # Complexity: O(nbk^3)
      x_sqrt = linalg_ops.batch_cholesky(x)

    batch_shape = self.batch_shape()
    event_shape = self.event_shape()
    ndims = array_ops.rank(x_sqrt)
    # sample_ndims = ndims - batch_ndims - event_ndims
    sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2
    sample_shape = array_ops.slice(
        array_ops.shape(x_sqrt), [0], [sample_ndims])

    # We need to be able to pre-multiply each matrix by its corresponding
    # batch scale matrix.  Since a Distribution Tensor supports multiple
    # samples per batch, this means we need to reshape the input matrix `x`
    # so that the first b dimensions are batch dimensions and the last two
    # are of shape [dimension, dimensions*number_of_samples]. Doing these
    # gymnastics allows us to do a batch_solve.
    #
    # After we're done with sqrt_solve (the batch operation) we need to undo
    # this reshaping so what we're left with is a Tensor partitionable by
    # sample, batch, event dimensions.

    # Complexity: O(nbk^2) since transpose must access every element.
    scale_sqrt_inv_x_sqrt = x_sqrt
    perm = array_ops.concat(0, (math_ops.range(sample_ndims, ndims),
                                math_ops.range(0, sample_ndims)))
    scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm)
    shape = array_ops.concat(
        0, (batch_shape,
            (math_ops.cast(self.dimension, dtype=dtypes.int32), -1)))
    scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape)

    # Complexity: O(nbM*k) where M is the complexity of the operator solving
    # a vector system.  E.g., for OperatorPDDiag, each solve is O(k), so
    # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is
    # O(k^2) so this step has complexity O(nbk^3).
    scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve(
        scale_sqrt_inv_x_sqrt)

    # Undo make batch-op ready.
    # Complexity: O(nbk^2)
    shape = array_ops.concat(0, (batch_shape, event_shape, sample_shape))
    scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape)
    perm = array_ops.concat(0, (math_ops.range(ndims - sample_ndims, ndims),
                                math_ops.range(0, ndims - sample_ndims)))
    scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm)

    # Write V = SS', X = LL'. Then:
    # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
    #              = tr[inv(S) L L' inv(S)']
    #              = tr[(inv(S) L) (inv(S) L)']
    #              = sum_{ik} (inv(S) L)_{ik}^2
    # The second equality follows from the cyclic permutation property.
    # Complexity: O(nbk^2)
    trace_scale_inv_x = math_ops.reduce_sum(
        math_ops.square(scale_sqrt_inv_x_sqrt),
        reduction_indices=[-2, -1])

    # Complexity: O(nbk)
    half_log_det_x = math_ops.reduce_sum(
        math_ops.log(array_ops.batch_matrix_diag_part(x_sqrt)),
        reduction_indices=[-1])

    # Complexity: O(nbk^2)
    log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                0.5 * trace_scale_inv_x -
                self.log_normalizing_constant())

    # Set shape hints.
    # Try to merge what we know from the input then what we know from the
    # parameters of this distribution.
    if x.get_shape().ndims is not None:
      log_prob.set_shape(x.get_shape()[:-2])
    if (log_prob.get_shape().ndims is not None and
        self.get_batch_shape().ndims is not None and
        self.get_batch_shape().ndims > 0):
      log_prob.get_shape()[-self.get_batch_shape().ndims:].merge_with(
          self.get_batch_shape())

    return log_prob
Beispiel #14
0
 def _std(self):
     if self.cholesky_input_output_matrices:
         raise ValueError(
             'Computing std. dev. when is cholesky_input_output_matrices=True '
             'does not make sense.')
     return linalg_ops.batch_cholesky(self.variance())