def testRectangular(self):
   with self.session(use_gpu=True):
     mat = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
     mat_diag = array_ops.matrix_diag_part(mat)
     self.assertAllEqual(mat_diag.eval(), np.array([1.0, 5.0]))
     mat = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
     mat_diag = array_ops.matrix_diag_part(mat)
     self.assertAllEqual(mat_diag.eval(), np.array([1.0, 4.0]))
 def _variance(self):
   if distribution_util.is_diagonal_scale(self.scale):
     return math_ops.square(self.scale.diag_part())
   elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and
         self.scale.is_self_adjoint):
     return array_ops.matrix_diag_part(
         self.scale.matmul(self.scale.to_dense()))
   else:
     return array_ops.matrix_diag_part(
         self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))
 def _stddev(self):
   if distribution_util.is_diagonal_scale(self.scale):
     return np.sqrt(2) * math_ops.abs(self.scale.diag_part())
   elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate)
         and self.scale.is_self_adjoint):
     return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part(
         self.scale.matmul(self.scale.to_dense())))
   else:
     return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part(
         self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)))
Example #4
0
 def _stddev(self):
   if (isinstance(self.scale, linalg.LinearOperatorIdentity) or
       isinstance(self.scale, linalg.LinearOperatorScaledIdentity) or
       isinstance(self.scale, linalg.LinearOperatorDiag)):
     return math_ops.abs(self.scale.diag_part())
   elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate)
         and self.scale.is_self_adjoint):
     return math_ops.sqrt(array_ops.matrix_diag_part(
         self.scale.apply(self.scale.to_dense())))
   else:
     # TODO(b/35040238): Remove transpose once LinOp supports `transpose`.
     return math_ops.sqrt(array_ops.matrix_diag_part(
         self.scale.apply(array_ops.matrix_transpose(self.scale.to_dense()))))
  def testSample(self):
    with self.test_session():
      scale = make_pd(1., 2)
      df = 4

      chol_w = distributions.WishartCholesky(
          df, chol(scale), cholesky_input_output_matrices=False)

      x = chol_w.sample(1, seed=42).eval()
      chol_x = [chol(x[0])]

      full_w = distributions.WishartFull(
          df, scale, cholesky_input_output_matrices=False)
      self.assertAllClose(x, full_w.sample(1, seed=42).eval())

      chol_w_chol = distributions.WishartCholesky(
          df, chol(scale), cholesky_input_output_matrices=True)
      self.assertAllClose(chol_x, chol_w_chol.sample(1, seed=42).eval())
      eigen_values = array_ops.matrix_diag_part(
          chol_w_chol.sample(
              1000, seed=42))
      np.testing.assert_array_less(0., eigen_values.eval())

      full_w_chol = distributions.WishartFull(
          df, scale, cholesky_input_output_matrices=True)
      self.assertAllClose(chol_x, full_w_chol.sample(1, seed=42).eval())
      eigen_values = array_ops.matrix_diag_part(
          full_w_chol.sample(
              1000, seed=42))
      np.testing.assert_array_less(0., eigen_values.eval())

      # Check first and second moments.
      df = 4.
      chol_w = distributions.WishartCholesky(
          df=df,
          scale=chol(make_pd(1., 3)),
          cholesky_input_output_matrices=False)
      x = chol_w.sample(10000, seed=42)
      self.assertAllEqual((10000, 3, 3), x.get_shape())

      moment1_estimate = math_ops.reduce_mean(x, reduction_indices=[0]).eval()
      self.assertAllClose(chol_w.mean().eval(), moment1_estimate, rtol=0.05)

      # The Variance estimate uses the squares rather than outer-products
      # because Wishart.Variance is the diagonal of the Wishart covariance
      # matrix.
      variance_estimate = (math_ops.reduce_mean(
          math_ops.square(x), reduction_indices=[0]) -
                           math_ops.square(moment1_estimate)).eval()
      self.assertAllClose(
          chol_w.variance().eval(), variance_estimate, rtol=0.05)
 def _maybe_attach_assertion(x):
   if not validate_args:
     return x
   if assert_positive:
     return control_flow_ops.with_dependencies([
         check_ops.assert_positive(
             array_ops.matrix_diag_part(x),
             message="diagonal part must be positive"),
     ], x)
   return control_flow_ops.with_dependencies([
       check_ops.assert_none_equal(
           array_ops.matrix_diag_part(x),
           array_ops.zeros([], x.dtype),
           message="diagonal part must be non-zero"),
   ], x)
 def testSquare(self):
   with self.session(use_gpu=True):
     v = np.array([1.0, 2.0, 3.0])
     mat = np.diag(v)
     mat_diag = array_ops.matrix_diag_part(mat)
     self.assertEqual((3,), mat_diag.get_shape())
     self.assertAllEqual(mat_diag.eval(), v)
Example #8
0
def entropy_matched_cauchy_scale(covariance):
  """Approximates a similar Cauchy distribution given a covariance matrix.

  Since Cauchy distributions do not have moments, entropy matching provides one
  way to set a Cauchy's scale parameter in a way that provides a similar
  distribution. The effect is dividing the standard deviation of an independent
  Gaussian by a constant very near 3.

  To set the scale of the Cauchy distribution, we first select the diagonals of
  `covariance`. Since this ignores cross terms, it overestimates the entropy of
  the Gaussian. For each of these variances, we solve for the Cauchy scale
  parameter which gives the same entropy as the Gaussian with that
  variance. This means setting the (univariate) Gaussian entropy
      0.5 * ln(2 * variance * pi * e)
  equal to the Cauchy entropy
      ln(4 * pi * scale)
  Solving, we get scale = sqrt(variance * (e / (8 pi))).

  Args:
    covariance: A [batch size x N x N] batch of covariance matrices to produce
        Cauchy scales for.
  Returns:
    A [batch size x N] set of Cauchy scale parameters for each part of the batch
    and each dimension of the input Gaussians.
  """
  return math_ops.sqrt(math.e / (8. * math.pi) *
                       array_ops.matrix_diag_part(covariance))
 def _to_dense(self):
   normalized_axis = self.reflection_axis / linalg.norm(
       self.reflection_axis, axis=-1, keepdims=True)
   mat = normalized_axis[..., array_ops.newaxis]
   matrix = -2 * math_ops.matmul(mat, mat, adjoint_b=True)
   return array_ops.matrix_set_diag(
       matrix, 1. + array_ops.matrix_diag_part(matrix))
Example #10
0
 def testCovarianceFromSampling(self):
   alpha = np.array([[1., 2, 3],
                     [2.5, 4, 0.01]], dtype=np.float32)
   with self.test_session() as sess:
     dist = dirichlet_lib.Dirichlet(alpha)  # batch_shape=[2], event_shape=[3]
     x = dist.sample(int(250e3), seed=1)
     sample_mean = math_ops.reduce_mean(x, 0)
     x_centered = x - sample_mean[None, ...]
     sample_cov = math_ops.reduce_mean(math_ops.matmul(
         x_centered[..., None], x_centered[..., None, :]), 0)
     sample_var = array_ops.matrix_diag_part(sample_cov)
     sample_stddev = math_ops.sqrt(sample_var)
     [
         sample_mean_,
         sample_cov_,
         sample_var_,
         sample_stddev_,
         analytic_mean,
         analytic_cov,
         analytic_var,
         analytic_stddev,
     ] = sess.run([
         sample_mean,
         sample_cov,
         sample_var,
         sample_stddev,
         dist.mean(),
         dist.covariance(),
         dist.variance(),
         dist.stddev(),
     ])
     self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.04)
     self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.06)
     self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.03)
     self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.02)
Example #11
0
def logdet(matrix, name=None):
  """Computes log of the determinant of a hermitian positive definite matrix.

  ```python
  # Compute the determinant of a matrix while reducing the chance of over- or
  underflow:
  A = ... # shape 10 x 10
  det = tf.exp(tf.logdet(A))  # scalar
  ```

  Args:
    matrix:  A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`,
      or `complex128` with shape `[..., M, M]`.
    name:  A name to give this `Op`.  Defaults to `logdet`.

  Returns:
    The natural log of the determinant of `matrix`.

  @compatibility(numpy)
  Equivalent to numpy.linalg.slogdet, although no sign is returned since only
  hermitian positive definite matrices are supported.
  @end_compatibility
  """
  # This uses the property that the log det(A) = 2*sum(log(real(diag(C))))
  # where C is the cholesky decomposition of A.
  with ops.name_scope(name, 'logdet', [matrix]):
    chol = gen_linalg_ops.cholesky(matrix)
    return 2.0 * math_ops.reduce_sum(
        math_ops.log(math_ops.real(array_ops.matrix_diag_part(chol))),
        reduction_indices=[-1])
Example #12
0
 def _forward_log_det_jacobian(self, x):
   # We formulate the Jacobian with respect to the flattened matrices
   # `vec(x)` and `vec(y)`. Suppose for notational convenience that
   # the first `n` entries of `vec(x)` are the diagonal of `x`, and
   # the remaining `n**2-n` entries are the off-diagonals in
   # arbitrary order. Then the Jacobian is a block-diagonal matrix,
   # with the Jacobian of the diagonal bijector in the first block,
   # and the identity Jacobian for the remaining entries (since this
   # bijector acts as the identity on non-diagonal entries):
   #
   # J_vec(x) (vec(y)) =
   # -------------------------------
   # | J_diag(x) (diag(y))      0  | n entries
   # |                             |
   # | 0                        I  | n**2-n entries
   # -------------------------------
   #   n                     n**2-n
   #
   # Since the log-det of the second (identity) block is zero, the
   # overall log-det-jacobian is just the log-det of first block,
   # from the diagonal bijector.
   #
   # Note that for elementwise operations (exp, softplus, etc) the
   # first block of the Jacobian will itself be a diagonal matrix,
   # but our implementation does not require this to be true.
   return self._diag_bijector.forward_log_det_jacobian(
       array_ops.matrix_diag_part(x), event_ndims=1)
Example #13
0
 def _variance(self):
   x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
   d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1)
   v = math_ops.square(x) + math_ops.matmul(d, d, adjoint_b=True)
   if self.cholesky_input_output_matrices:
     return linalg_ops.cholesky(v)
   return v
def matrix_diag_transform(matrix, transform=None, name=None):
  """Transform diagonal of [batch-]matrix, leave rest of matrix unchanged.

  Create a trainable covariance defined by a Cholesky factor:

  ```python
  # Transform network layer into 2 x 2 array.
  matrix_values = tf.contrib.layers.fully_connected(activations, 4)
  matrix = tf.reshape(matrix_values, (batch_size, 2, 2))

  # Make the diagonal positive.  If the upper triangle was zero, this would be a
  # valid Cholesky factor.
  chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)

  # OperatorPDCholesky ignores the upper triangle.
  operator = OperatorPDCholesky(chol)
  ```

  Example of heteroskedastic 2-D linear regression.

  ```python
  # Get a trainable Cholesky factor.
  matrix_values = tf.contrib.layers.fully_connected(activations, 4)
  matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
  chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)

  # Get a trainable mean.
  mu = tf.contrib.layers.fully_connected(activations, 2)

  # This is a fully trainable multivariate normal!
  dist = tf.contrib.distributions.MVNCholesky(mu, chol)

  # Standard log loss.  Minimizing this will "train" mu and chol, and then dist
  # will be a distribution predicting labels as multivariate Gaussians.
  loss = -1 * tf.reduce_mean(dist.log_prob(labels))
  ```

  Args:
    matrix:  Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are
      equal.
    transform:  Element-wise function mapping `Tensors` to `Tensors`.  To
      be applied to the diagonal of `matrix`.  If `None`, `matrix` is returned
      unchanged.  Defaults to `None`.
    name:  A name to give created ops.
      Defaults to "matrix_diag_transform".

  Returns:
    A `Tensor` with same shape and `dtype` as `matrix`.
  """
  with ops.name_scope(name, "matrix_diag_transform", [matrix]):
    matrix = ops.convert_to_tensor(matrix, name="matrix")
    if transform is None:
      return matrix
    # Replace the diag with transformed diag.
    diag = array_ops.matrix_diag_part(matrix)
    transformed_diag = transform(diag)
    transformed_mat = array_ops.matrix_set_diag(matrix, transformed_diag)

  return transformed_mat
Example #15
0
def sign_magnitude_positive_definite(
    raw, off_diagonal_scale=0., overall_scale=0.):
  """Constructs a positive definite matrix from an unconstrained input matrix.

  We want to keep the whole matrix on a log scale, but also allow off-diagonal
  elements to be negative, so the sign of off-diagonal elements is modeled
  separately from their magnitude (using the lower and upper triangles
  respectively). Specifically:

  for i < j, we have:
    output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) *
        exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2)

  output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2)

  output = output_cholesky^T * output_cholesky

  where raw, off_diagonal_scale, and overall_scale are
  un-constrained real-valued variables. The resulting values are stable
  around zero due to the exponential (and the softsign keeps the function
  smooth).

  Args:
    raw: A [..., M, M] Tensor.
    off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative
        scale of off-diagonal values in the output matrix.
    overall_scale: A scalar or [...] shaped Tensor controlling the overall scale
        of the output matrix.
  Returns:
    The `output` matrix described above, a [..., M, M] positive definite matrix.

  """
  raw = ops.convert_to_tensor(raw)
  diagonal = array_ops.matrix_diag_part(raw)
  def _right_pad_with_ones(tensor, target_rank):
    # Allow broadcasting even if overall_scale and off_diagonal_scale have batch
    # dimensions
    tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype)
    return array_ops.reshape(tensor,
                             array_ops.concat(
                                 [
                                     array_ops.shape(tensor), array_ops.ones(
                                         [target_rank - array_ops.rank(tensor)],
                                         dtype=target_rank.dtype)
                                 ],
                                 axis=0))
  # We divide the log values by 2 to compensate for the squaring that happens
  # when transforming Cholesky factors into positive definite matrices.
  sign_magnitude = (gen_math_ops.exp(
      (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) +
       _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) *
                    nn.softsign(array_ops.matrix_transpose(raw)))
  sign_magnitude.set_shape(raw.get_shape())
  cholesky_factor = array_ops.matrix_set_diag(
      input=array_ops.matrix_band_part(sign_magnitude, 0, -1),
      diagonal=gen_math_ops.exp((diagonal + _right_pad_with_ones(
          overall_scale, array_ops.rank(diagonal))) / 2.))
  return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
 def _sqrt_log_det(self):
   # The matrix determinant lemma states:
   # det(M + VDV^T) = det(D^{-1} + V^T M^{-1} V) * det(D) * det(M)
   #                = det(C) * det(D) * det(M)
   #
   # Here we compute the Cholesky factor of "C", then pass the result on.
   diag_chol_c = array_ops.matrix_diag_part(
       self._chol_capacitance(batch_mode=False))
   return self._sqrt_log_det_core(diag_chol_c)
 def _batch_log_det(self):
   """Log determinant of every batch member."""
   # Note that array_ops.diag_part does not seem more efficient for non-batch,
   # and would give a bad result for a batch matrix, so aways use
   # matrix_diag_part.
   diag = array_ops.matrix_diag_part(self._chol)
   det = 2.0 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])
   det.set_shape(self.get_shape()[:-2])
   return det
Example #18
0
 def _log_abs_determinant(self):
   logging.warn(
       "Using (possibly slow) default implementation of determinant."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     diag = array_ops.matrix_diag_part(linalg_ops.cholesky(self.to_dense()))
     return 2 * math_ops.reduce_sum(math_ops.log(diag), axis=[-1])
   _, log_abs_det = linalg.slogdet(self.to_dense())
   return log_abs_det
def _GradWithInverseL(l, l_inverse, grad):
  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)
  grad_a = math_ops.matmul(
      math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)
  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
  return grad_a * 0.5
Example #20
0
 def _log_abs_determinant(self):
   logging.warn(
       "Using (possibly slow) default implementation of determinant."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     diag = array_ops.matrix_diag_part(self._get_cached_chol())
     return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])
   abs_det = math_ops.abs(self.determinant())
   return math_ops.log(abs_det)
Example #21
0
 def testRectangularBatch(self):
   with self.session(use_gpu=True):
     v_batch = np.array([[1.0, 2.0], [4.0, 5.0]])
     mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0]],
                           [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0]]])
     self.assertEqual(mat_batch.shape, (2, 2, 3))
     mat_batch_diag = array_ops.matrix_diag_part(mat_batch)
     self.assertEqual((2, 2), mat_batch_diag.get_shape())
     self.assertAllEqual(mat_batch_diag.eval(), v_batch)
Example #22
0
  def __init__(self,
               df,
               scale,
               cholesky_input_output_matrices=False,
               validate_args=False,
               allow_nan_stats=True,
               name="WishartCholesky"):
    """Construct Wishart distributions.

    Args:
      df: `float` or `double` `Tensor`. Degrees of freedom, must be greater than
        or equal to dimension of the scale matrix.
      scale: `float` or `double` `Tensor`. The Cholesky factorization of
        the symmetric positive definite scale matrix of the distribution.
      cholesky_input_output_matrices: Python `bool`. Any function which whose
        input or output is a matrix assumes the input is Cholesky and returns a
        Cholesky factored matrix. Example `log_prob` input takes a Cholesky and
        `sample_n` returns a Cholesky when
        `cholesky_input_output_matrices=True`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())
    with ops.name_scope(name, values=[scale]) as name:
      with ops.name_scope("init", values=[scale]):
        scale = ops.convert_to_tensor(scale)
        if validate_args:
          scale = control_flow_ops.with_dependencies([
              check_ops.assert_positive(
                  array_ops.matrix_diag_part(scale),
                  message="scale must be positive definite"),
              check_ops.assert_equal(
                  array_ops.shape(scale)[-1],
                  array_ops.shape(scale)[-2],
                  message="scale must be square")
          ] if validate_args else [], scale)

      super(WishartCholesky, self).__init__(
          df=df,
          scale_operator=linalg.LinearOperatorLowerTriangular(
              tril=scale,
              is_non_singular=True,
              is_positive_definite=True,
              is_square=True),
          cholesky_input_output_matrices=cholesky_input_output_matrices,
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats,
          name=name)
    self._parameters = parameters
Example #23
0
def mvn_tril_log_prob(loc, scale_tril, x):
  """Computes the MVN log pdf under tril scale. Doesn't handle batches."""
  x0 = x - loc
  z = linalg_ops.matrix_triangular_solve(
      scale_tril, x0[..., array_ops.newaxis])[..., 0]
  log_det_cov = 2. * math_ops.reduce_sum(math_ops.log(
      array_ops.matrix_diag_part(scale_tril)), axis=-1)
  d = math_ops.cast(array_ops.shape(scale_tril)[-1], log_det_cov.dtype)
  return -0.5 * (math_ops.reduce_sum(math_ops.square(z), axis=-1)
                 + d * np.log(2. * np.pi) + log_det_cov)
  def _log_abs_determinant(self):
    if self._is_spd:
      diag = array_ops.matrix_diag_part(self._chol)
      return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])

    if self.dtype.is_complex:
      abs_det = math_ops.complex_abs(self.determinant())
    else:
      abs_det = math_ops.abs(self.determinant())
    return math_ops.log(abs_det)
Example #25
0
 def _testSquareBatch(self, dtype):
   with self.cached_session(use_gpu=True):
     v_batch = np.array([[1.0, 0.0, 3.0], [4.0, 5.0, 6.0]]).astype(dtype)
     mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 3.0]],
                           [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0],
                            [0.0, 0.0, 6.0]]]).astype(dtype)
     self.assertEqual(mat_batch.shape, (2, 3, 3))
     mat_batch_diag = array_ops.matrix_diag_part(mat_batch)
     self.assertEqual((2, 3), mat_batch_diag.get_shape())
     self.assertAllEqual(mat_batch_diag.eval(), v_batch)
Example #26
0
 def testGrad(self):
   shapes = ((3, 3), (2, 3), (3, 2), (5, 3, 3))
   with self.session(use_gpu=True):
     for shape in shapes:
       x = constant_op.constant(np.random.rand(*shape), dtype=np.float32)
       y = array_ops.matrix_diag_part(x)
       error = gradient_checker.compute_gradient_error(x,
                                                       x.get_shape().as_list(),
                                                       y,
                                                       y.get_shape().as_list())
       self.assertLess(error, 1e-4)
  def __init__(self,
               tril,
               is_non_singular=None,
               is_self_adjoint=None,
               is_positive_definite=None,
               is_square=None,
               name="LinearOperatorLowerTriangular"):
    r"""Initialize a `LinearOperatorLowerTriangular`.

    Args:
      tril:  Shape `[B1,...,Bb, N, N]` with `b >= 0`, `N >= 0`.
        The lower triangular part of `tril` defines this operator.  The strictly
        upper triangle is ignored.  Allowed dtypes: `float16`, `float32`,
        `float64`.
      is_non_singular:  Expect that this operator is non-singular.
        This operator is non-singular if and only if its diagonal elements are
        all non-zero.
      is_self_adjoint:  Expect that this operator is equal to its hermitian
        transpose.  This operator is self-adjoint only if it is diagonal with
        real-valued diagonal entries.  In this case it is advised to use
        `LinearOperatorDiag`.
      is_positive_definite:  Expect that this operator is positive definite,
        meaning the quadratic form `x^H A x` has positive real part for all
        nonzero `x`.  Note that we do not require the operator to be
        self-adjoint to be positive-definite.  See:
        https://en.wikipedia.org/wiki/Positive-definite_matrix\
            #Extension_for_non_symmetric_matrices
      is_square:  Expect that this operator acts like square [batch] matrices.
      name: A name for this `LinearOperator`.

    Raises:
      TypeError:  If `diag.dtype` is not an allowed type.
      ValueError:  If `is_square` is `False`.
    """

    if is_square is False:
      raise ValueError(
          "Only square lower triangular operators supported at this time.")
    is_square = True

    with ops.name_scope(name, values=[tril]):
      self._tril = ops.convert_to_tensor(tril, name="tril")
      self._check_tril(self._tril)
      self._tril = array_ops.matrix_band_part(tril, -1, 0)
      self._diag = array_ops.matrix_diag_part(self._tril)

      super(LinearOperatorLowerTriangular, self).__init__(
          dtype=self._tril.dtype,
          graph_parents=[self._tril],
          is_non_singular=is_non_singular,
          is_self_adjoint=is_self_adjoint,
          is_positive_definite=is_positive_definite,
          is_square=is_square,
          name=name)
Example #28
0
 def _preprocess_tril(self, identity_multiplier, diag, tril, event_ndims):
   """Helper to preprocess a lower triangular matrix."""
   tril = array_ops.matrix_band_part(tril, -1, 0)  # Zero out TriU.
   if identity_multiplier is None and diag is None:
     return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
   new_diag = array_ops.matrix_diag_part(tril)
   if identity_multiplier is not None:
     new_diag += identity_multiplier
   if diag is not None:
     new_diag += diag
   tril = array_ops.matrix_set_diag(tril, new_diag)
   return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
  def __init__(self,
               tril,
               is_non_singular=None,
               is_self_adjoint=None,
               is_positive_definite=None,
               name="LinearOperatorTriL"):
    """Initialize a `LinearOperatorTriL`.

    Args:
      tril:  Shape `[B1,...,Bb, N, N]` with `b >= 0`, `N >= 0`.
        The lower triangular part of `tril` defines this operator.  The strictly
        upper triangle is ignored.  Allowed dtypes: `float32`, `float64`.
      is_non_singular:  Expect that this operator is non-singular.
        This operator is non-singular if and only if its diagonal elements are
        all non-zero.
      is_self_adjoint:  Expect that this operator is equal to its hermitian
        transpose.  This operator is self-adjoint only if it is diagonal with
        real-valued diagonal entries.  In this case it is advised to use
        `LinearOperatorDiag`.
      is_positive_definite:  Expect that this operator is positive definite,
        meaning the real part of all eigenvalues is positive.  We do not require
        the operator to be self-adjoint to be positive-definite.  See:
        https://en.wikipedia.org/wiki/Positive-definite_matrix
            #Extension_for_non_symmetric_matrices
      name: A name for this `LinearOperator`.

    Raises:
      TypeError:  If `diag.dtype` is not an allowed type.
    """

    # TODO(langmore) Add complex types once matrix_triangular_solve works for
    # them.
    allowed_dtypes = [dtypes.float32, dtypes.float64]

    with ops.name_scope(name, values=[tril]):
      self._tril = array_ops.matrix_band_part(tril, -1, 0)
      self._diag = array_ops.matrix_diag_part(self._tril)

      dtype = self._tril.dtype
      if dtype not in allowed_dtypes:
        raise TypeError(
            "Argument tril must have dtype in %s.  Found: %s"
            % (allowed_dtypes, dtype))

      super(LinearOperatorTriL, self).__init__(
          dtype=self._tril.dtype,
          graph_parents=[self._tril],
          is_non_singular=is_non_singular,
          is_self_adjoint=is_self_adjoint,
          is_positive_definite=is_positive_definite,
          name=name)
Example #30
0
def _MatrixSetDiagGrad(op, grad):
  diag_shape = op.inputs[1].get_shape()
  diag_shape = diag_shape.merge_with(op.inputs[0].get_shape()[:-1])
  diag_shape = diag_shape.merge_with(grad.get_shape()[:-1])
  if diag_shape.is_fully_defined():
    diag_shape = diag_shape.as_list()
  else:
    diag_shape = array_ops.shape(grad)
    diag_shape = array_ops.slice(diag_shape, [0], [array_ops.rank(grad) - 1])
  grad_input = array_ops.matrix_set_diag(
      grad, array_ops.zeros(
          diag_shape, dtype=grad.dtype))
  grad_diag = array_ops.matrix_diag_part(grad)
  return (grad_input, grad_diag)
Example #31
0
def tridiagonal_solve(diagonals,
                      rhs,
                      diagonals_format='compact',
                      transpose_rhs=False,
                      conjugate_rhs=False,
                      name=None,
                      partial_pivoting=True):
    r"""Solves tridiagonal systems of equations.

  The input can be supplied in various formats: `matrix`, `sequence` and
  `compact`, specified by the `diagonals_format` arg.

  In `matrix` format, `diagonals` must be a tensor of shape `[..., M, M]`, with
  two inner-most dimensions representing the square tridiagonal matrices.
  Elements outside of the three diagonals will be ignored.

  In `sequence` format, `diagonals` are supplied as a tuple or list of three
  tensors of shapes `[..., N]`, `[..., M]`, `[..., N]` representing
  superdiagonals, diagonals, and subdiagonals, respectively. `N` can be either
  `M-1` or `M`; in the latter case, the last element of superdiagonal and the
  first element of subdiagonal will be ignored.

  In `compact` format the three diagonals are brought together into one tensor
  of shape `[..., 3, M]`, with last two dimensions containing superdiagonals,
  diagonals, and subdiagonals, in order. Similarly to `sequence` format,
  elements `diagonals[..., 0, M-1]` and `diagonals[..., 2, 0]` are ignored.

  The `compact` format is recommended as the one with best performance. In case
  you need to cast a tensor into a compact format manually, use `tf.gather_nd`.
  An example for a tensor of shape [m, m]:

  ```python
  rhs = tf.constant([...])
  matrix = tf.constant([[...]])
  m = matrix.shape[0]
  dummy_idx = [0, 0]  # An arbitrary element to use as a dummy
  indices = [[[i, i + 1] for i in range(m - 1)] + [dummy_idx],  # Superdiagonal
           [[i, i] for i in range(m)],                          # Diagonal
           [dummy_idx] + [[i + 1, i] for i in range(m - 1)]]    # Subdiagonal
  diagonals=tf.gather_nd(matrix, indices)
  x = tf.linalg.tridiagonal_solve(diagonals, rhs)
  ```

  Regardless of the `diagonals_format`, `rhs` is a tensor of shape `[..., M]` or
  `[..., M, K]`. The latter allows to simultaneously solve K systems with the
  same left-hand sides and K different right-hand sides. If `transpose_rhs`
  is set to `True` the expected shape is `[..., M]` or `[..., K, M]`.

  The batch dimensions, denoted as `...`, must be the same in `diagonals` and
  `rhs`.

  The output is a tensor of the same shape as `rhs`: either `[..., M]` or
  `[..., M, K]`.

  The op isn't guaranteed to raise an error if the input matrix is not
  invertible. `tf.debugging.check_numerics` can be applied to the output to
  detect invertibility problems.

  **Note**: with large batch sizes, the computation on the GPU may be slow, if
  either `partial_pivoting=True` or there are multiple right-hand sides
  (`K > 1`). If this issue arises, consider if it's possible to disable pivoting
  and have `K = 1`, or, alternatively, consider using CPU.

  On CPU, solution is computed via Gaussian elimination with or without partial
  pivoting, depending on `partial_pivoting` parameter. On GPU, Nvidia's cuSPARSE
  library is used: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv

  Args:
    diagonals: A `Tensor` or tuple of `Tensor`s describing left-hand sides. The
      shape depends of `diagonals_format`, see description above. Must be
      `float32`, `float64`, `complex64`, or `complex128`.
    rhs: A `Tensor` of shape [..., M] or [..., M, K] and with the same dtype as
      `diagonals`. Note that if the shape of `rhs` and/or `diags` isn't known
      statically, `rhs` will be treated as a matrix rather than a vector.
    diagonals_format: one of `matrix`, `sequence`, or `compact`. Default is
      `compact`.
    transpose_rhs: If `True`, `rhs` is transposed before solving (has no effect
      if the shape of rhs is [..., M]).
    conjugate_rhs: If `True`, `rhs` is conjugated before solving.
    name:  A name to give this `Op` (optional).
    partial_pivoting: whether to perform partial pivoting. `True` by default.
      Partial pivoting makes the procedure more stable, but slower. Partial
      pivoting is unnecessary in some cases, including diagonally dominant and
      symmetric positive definite matrices (see e.g. theorem 9.12 in [1]).

  Returns:
    A `Tensor` of shape [..., M] or [..., M, K] containing the solutions.

  Raises:
    ValueError: An unsupported type is provided as input, or when the input
      tensors have incorrect shapes.
    UnimplementedError: Whenever `partial_pivoting` is true and the backend is
      XLA.

  [1] Nicholas J. Higham (2002). Accuracy and Stability of Numerical Algorithms:
  Second Edition. SIAM. p. 175. ISBN 978-0-89871-802-7.

  """
    if diagonals_format == 'compact':
        return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
                                                 conjugate_rhs,
                                                 partial_pivoting, name)

    if diagonals_format == 'sequence':
        if not isinstance(diagonals, (tuple, list)) or len(diagonals) != 3:
            raise ValueError(
                'Expected diagonals to be a sequence of length 3.')

        superdiag, maindiag, subdiag = diagonals
        if (not subdiag.shape[:-1].is_compatible_with(maindiag.shape[:-1])
                or not superdiag.shape[:-1].is_compatible_with(
                    maindiag.shape[:-1])):
            raise ValueError(
                'Tensors representing the three diagonals must have the same shape,'
                'except for the last dimension, got {}, {}, {}'.format(
                    subdiag.shape, maindiag.shape, superdiag.shape))

        m = tensor_shape.dimension_value(maindiag.shape[-1])

        def pad_if_necessary(t, name, last_dim_padding):
            n = tensor_shape.dimension_value(t.shape[-1])
            if not n or n == m:
                return t
            if n == m - 1:
                paddings = ([[0, 0] for _ in range(len(t.shape) - 1)] +
                            [last_dim_padding])
                return array_ops.pad(t, paddings)
            raise ValueError(
                'Expected {} to be have length {} or {}, got {}.'.format(
                    name, m, m - 1, n))

        subdiag = pad_if_necessary(subdiag, 'subdiagonal', [1, 0])
        superdiag = pad_if_necessary(superdiag, 'superdiagonal', [0, 1])

        diagonals = array_ops.stack((superdiag, maindiag, subdiag), axis=-2)
        return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
                                                 conjugate_rhs,
                                                 partial_pivoting, name)

    if diagonals_format == 'matrix':
        m1 = tensor_shape.dimension_value(diagonals.shape[-1])
        m2 = tensor_shape.dimension_value(diagonals.shape[-2])
        if m1 and m2 and m1 != m2:
            raise ValueError(
                'Expected last two dimensions of diagonals to be same, got {} and {}'
                .format(m1, m2))
        m = m1 or m2
        diagonals = array_ops.matrix_diag_part(diagonals,
                                               k=(-1, 1),
                                               padding_value=0.,
                                               align='LEFT_RIGHT')
        return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
                                                 conjugate_rhs,
                                                 partial_pivoting, name)

    raise ValueError(
        'Unrecognized diagonals_format: {}'.format(diagonals_format))
Example #32
0
 def loop_fn(i):
     return array_ops.matrix_diag_part(array_ops.gather(x, i))
Example #33
0
 def testInvalidShapeAtEval(self):
   with self.session(use_gpu=True):
     v = array_ops.placeholder(dtype=dtypes_lib.float32)
     with self.assertRaisesOpError("input must be at least 2-dim"):
       array_ops.matrix_diag_part(v).eval(feed_dict={v: 0.0})
Example #34
0
def relu(x, alpha=0.0, max_value=None, threshold=0.0, mode="diag"):
    """Rectified Linear Unit.

    Assumed Density Filtering (ADF) version of the Keras `relu` activation.

    Parameters
    ----------
    x : list or tuple
        Input tensors (means and covariances).
    alpha: float, optional
        Slope of negative section. Default is ``0.0``.
        Currently no value other than the default is supported for  ADF.
    max_value: float, optional
        Saturation threshold. Default is `None`.
        Currently no value other than the default is supported for  ADF.
    threshold: float, optional
        Threshold value for thresholded activation. Default is ``0.0``.
        Currently no value other than the default is supported for  ADF.
    mode: {"diag", "diagonal", "lowrank", "half", "full"}
        Covariance computation mode. Default is "diag".

    Returns
    -------
    list
        List of transformed means and covariances, according to
        the ReLU activation: ``max(x, 0)``.

    """
    if not alpha == 0.0:
        raise NotImplementedError(
            "The relu activation function with alpha other than 0.0 has"
            "not been implemented for ADF layers yet."
        )
    if max_value is not None:
        raise NotImplementedError(
            "The relu activation function with max_value other than `None` "
            "has not been implemented for ADF layers yet."
        )
    if not threshold == 0.0:
        raise NotImplementedError(
            "The relu activation function with threshold other than 0.0 has"
            "not been implemented for ADF layers yet."
        )
    if not isinstance(x, list) and len(x) == 2:
        raise ValueError(
            "The relu activation function expects a list of "
            "exactly two input tensors, but got: %s" % x
        )
    means, covariances = x
    means_shape = means.get_shape().as_list()
    means_rank = len(means_shape)
    cov_shape = covariances.get_shape().as_list()
    cov_rank = len(cov_shape)
    EPS = K.cast(K.epsilon(), covariances.dtype)
    # treat inputs according to rank and mode
    if means_rank == 1:
        # if rank(mean)=1, treat as single vector, no reshapes necessary
        pass
    elif means_rank == 2:
        # if rank(mean)=2, treat as batch of vectors, no reshapes necessary
        pass
    else:
        # if rank(mean)=2+n, treat as batch of rank=n tensors + channels
        means = K.reshape(means, [-1] + [K.prod(means_shape[1:])],)
        if mode == "diag":
            covariances = K.reshape(
                covariances, [-1] + [K.prod(cov_shape[1:])],
            )
        elif mode == "half":
            covariances = K.reshape(
                covariances, [-1] + [cov_shape[1]] + [K.prod(cov_shape[2:])],
            )
        elif mode == "full":
            covariances = K.reshape(
                covariances,
                [-1]
                + [K.prod(cov_shape[1 : (cov_rank - 1) // 2 + 1])]
                + [K.prod(cov_shape[(cov_rank - 1) // 2 + 1 :])],
            )
    if mode == "diag":
        covariances = covariances + EPS
        std = K.sqrt(covariances)
        div = means / std
        gd_div = _gauss_density(div)
        gc_div = _gauss_cumulative(div)
        new_means = K.maximum(
            means,
            K.maximum(K.zeros_like(means), means * gc_div + std * gd_div),
        )
        new_covariances = (
            K.square(means) * gc_div
            + covariances * gc_div
            + means * std * gd_div
            - K.square(new_means)
        )
        new_covariances = K.maximum(
            K.zeros_like(new_covariances), new_covariances
        )
    elif mode == "half":
        variances = K.sum(K.square(covariances), axis=1) + EPS
        std = K.sqrt(variances)
        div = means / std
        gd_div = _gauss_density(div)
        gc_div = _gauss_cumulative(div)
        new_means = K.maximum(
            means,
            K.maximum(K.zeros_like(means), means * gc_div + std * gd_div),
        )
        gc_div = K.expand_dims(gc_div, 1)
        new_covariances = covariances * gc_div
    elif mode == "full":
        variances = array_ops.matrix_diag_part(covariances) + EPS
        std = K.sqrt(variances)
        div = means / std
        gd_div = _gauss_density(div)
        gc_div = _gauss_cumulative(div)
        new_means = K.maximum(
            means,
            K.maximum(K.zeros_like(means), means * gc_div + std * gd_div),
        )
        gc_div = K.expand_dims(gc_div, 1)
        new_covariances = covariances * gc_div
        new_covariances = K.permute_dimensions(new_covariances, [0, 2, 1])
        new_covariances = new_covariances * gc_div
        new_covariances = K.permute_dimensions(new_covariances, [0, 2, 1])
    # undo reshapes if necessary
    new_means = K.reshape(new_means, [-1] + means_shape[1:])
    new_covariances = K.reshape(new_covariances, [-1] + cov_shape[1:])
    return [new_means, new_covariances]
Example #35
0
 def _forward(self, x):
   diag = self._diag_bijector.forward(array_ops.matrix_diag_part(x))
   return array_ops.matrix_set_diag(x, diag)
 def _add_to_tensor(self, mat):
   mat_diag = array_ops.matrix_diag_part(mat)
   new_diag = math_ops.square(self._diag) + mat_diag
   return array_ops.matrix_set_diag(mat, new_diag)
Example #37
0
 def _get_diag(self):
     """Gets the diagonal part of `tril` kwarg."""
     return array_ops.matrix_diag_part(self._tril)
Example #38
0
    def _mean_of_covariance_given_quadrature_component(self, diag_only):
        p = self.mixture_distribution.probs

        # To compute E[Cov(Z|V)], we'll add matrices within three categories:
        # scaled-identity, diagonal, and full. Then we'll combine these at the end.
        scale_identity_multiplier = None
        diag = None
        full = None

        for k, aff in enumerate(self.interpolated_affine):
            s = aff.scale  # Just in case aff.scale has side-effects, we'll call once.
            if (s is None or isinstance(
                    s, linop_identity_lib.LinearOperatorIdentity)):
                scale_identity_multiplier = add(scale_identity_multiplier,
                                                p[..., k, array_ops.newaxis])
            elif isinstance(s,
                            linop_identity_lib.LinearOperatorScaledIdentity):
                scale_identity_multiplier = add(
                    scale_identity_multiplier, (p[..., k, array_ops.newaxis] *
                                                math_ops.square(s.multiplier)))
            elif isinstance(s, linop_diag_lib.LinearOperatorDiag):
                diag = add(diag, (p[..., k, array_ops.newaxis] *
                                  math_ops.square(s.diag_part())))
            else:
                x = (p[..., k, array_ops.newaxis, array_ops.newaxis] *
                     s.matmul(s.to_dense(), adjoint_arg=True))
                if diag_only:
                    x = array_ops.matrix_diag_part(x)
                full = add(full, x)

        # We must now account for the fact that the base distribution might have a
        # non-unity variance. Recall that, since X ~ iid Law(X_0),
        #   `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`.
        # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid
        # samples from a scalar-event distribution.
        v = self.distribution.variance()
        if scale_identity_multiplier is not None:
            scale_identity_multiplier *= v
        if diag is not None:
            diag *= v[..., array_ops.newaxis]
        if full is not None:
            full *= v[..., array_ops.newaxis]

        if diag_only:
            # Apparently we don't need the full matrix, just the diagonal.
            r = add(diag, full)
            if r is None and scale_identity_multiplier is not None:
                ones = array_ops.ones(self.event_shape_tensor(),
                                      dtype=self.dtype)
                return scale_identity_multiplier[..., array_ops.newaxis] * ones
            return add(r, scale_identity_multiplier)

        # `None` indicates we don't know if the result is positive-definite.
        is_positive_definite = (True if all(
            aff.scale.is_positive_definite
            for aff in self.endpoint_affine) else None)

        to_add = []
        if diag is not None:
            to_add.append(
                linop_diag_lib.LinearOperatorDiag(
                    diag=diag, is_positive_definite=is_positive_definite))
        if full is not None:
            to_add.append(
                linop_full_lib.LinearOperatorFullMatrix(
                    matrix=full, is_positive_definite=is_positive_definite))
        if scale_identity_multiplier is not None:
            to_add.append(
                linop_identity_lib.LinearOperatorScaledIdentity(
                    num_rows=self.event_shape_tensor()[0],
                    multiplier=scale_identity_multiplier,
                    is_positive_definite=is_positive_definite))

        return (linop_add_lib.add_operators(to_add)[0].to_dense()
                if to_add else None)
Example #39
0
 def _add_to_tensor(self, mat):
     # Add to a tensor in O(k) time!
     mat_diag = array_ops.matrix_diag_part(mat)
     new_diag = self._scale + mat_diag
     return array_ops.matrix_set_diag(mat, new_diag)
Example #40
0
def sign_magnitude_positive_definite(raw,
                                     off_diagonal_scale=0.,
                                     overall_scale=0.):
    """Constructs a positive definite matrix from an unconstrained input matrix.

  We want to keep the whole matrix on a log scale, but also allow off-diagonal
  elements to be negative, so the sign of off-diagonal elements is modeled
  separately from their magnitude (using the lower and upper triangles
  respectively). Specifically:

  for i < j, we have:
    output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) *
        exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2)

  output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2)

  output = output_cholesky^T * output_cholesky

  where raw, off_diagonal_scale, and overall_scale are
  un-constrained real-valued variables. The resulting values are stable
  around zero due to the exponential (and the softsign keeps the function
  smooth).

  Args:
    raw: A [..., M, M] Tensor.
    off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative
        scale of off-diagonal values in the output matrix.
    overall_scale: A scalar or [...] shaped Tensor controlling the overall scale
        of the output matrix.
  Returns:
    The `output` matrix described above, a [..., M, M] positive definite matrix.

  """
    raw = ops.convert_to_tensor(raw)
    diagonal = array_ops.matrix_diag_part(raw)

    def _right_pad_with_ones(tensor, target_rank):
        # Allow broadcasting even if overall_scale and off_diagonal_scale have batch
        # dimensions
        tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype)
        return array_ops.reshape(
            tensor,
            array_ops.concat([
                array_ops.shape(tensor),
                array_ops.ones([target_rank - array_ops.rank(tensor)],
                               dtype=target_rank.dtype)
            ],
                             axis=0))

    # We divide the log values by 2 to compensate for the squaring that happens
    # when transforming Cholesky factors into positive definite matrices.
    sign_magnitude = (gen_math_ops.exp(
        (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) +
         _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) *
                      nn.softsign(array_ops.matrix_transpose(raw)))
    sign_magnitude.set_shape(raw.get_shape())
    cholesky_factor = array_ops.matrix_set_diag(
        input=array_ops.matrix_band_part(sign_magnitude, 0, -1),
        diagonal=gen_math_ops.exp(
            (diagonal +
             _right_pad_with_ones(overall_scale, array_ops.rank(diagonal))) /
            2.))
    return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
Example #41
0
 def _diag_part(self):
     """Generic and often inefficient implementation.  Override often."""
     return array_ops.matrix_diag_part(self.to_dense())
def make_tril_scale(loc=None,
                    scale_tril=None,
                    scale_diag=None,
                    scale_identity_multiplier=None,
                    shape_hint=None,
                    validate_args=False,
                    assert_positive=False,
                    name=None):
    """Creates a LinOp representing a lower triangular matrix.

  Args:
    loc: Floating-point `Tensor`. This is used for inferring shape in the case
      where only `scale_identity_multiplier` is set.
    scale_tril: Floating-point `Tensor` representing the diagonal matrix.
      `scale_diag` has shape [N1, N2, ...  k, k], which represents a k x k
      lower triangular matrix.
      When `None` no `scale_tril` term is added to the LinOp.
      The upper triangular elements above the diagonal are ignored.
    scale_diag: Floating-point `Tensor` representing the diagonal matrix.
      `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
      diagonal matrix.
      When `None` no diagonal term is added to the LinOp.
    scale_identity_multiplier: floating point rank 0 `Tensor` representing a
      scaling done to the identity matrix.
      When `scale_identity_multiplier = scale_diag = scale_tril = None` then
      `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
      to `scale`.
    shape_hint: scalar integer `Tensor` representing a hint at the dimension of
      the identity matrix when only `scale_identity_multiplier` is set.
    validate_args: Python `bool` indicating whether arguments should be
      checked for correctness.
    assert_positive: Python `bool` indicating whether LinOp should be checked
      for being positive definite.
    name: Python `str` name given to ops managed by this object.

  Returns:
    `LinearOperator` representing a lower triangular matrix.

  Raises:
    ValueError:  If only `scale_identity_multiplier` is set and `loc` and
      `shape_hint` are both None.
  """
    def _maybe_attach_assertion(x):
        if not validate_args:
            return x
        if assert_positive:
            return control_flow_ops.with_dependencies([
                check_ops.assert_positive(
                    array_ops.matrix_diag_part(x),
                    message="diagonal part must be positive"),
            ], x)
        return control_flow_ops.with_dependencies([
            check_ops.assert_none_equal(
                array_ops.matrix_diag_part(x),
                array_ops.zeros([], x.dtype),
                message="diagonal part must be non-zero"),
        ], x)

    with ops.name_scope(name,
                        "make_tril_scale",
                        values=[loc, scale_diag, scale_identity_multiplier]):

        loc = _convert_to_tensor(loc, name="loc")
        scale_tril = _convert_to_tensor(scale_tril, name="scale_tril")
        scale_diag = _convert_to_tensor(scale_diag, name="scale_diag")
        scale_identity_multiplier = _convert_to_tensor(
            scale_identity_multiplier, name="scale_identity_multiplier")

    if scale_tril is not None:
        scale_tril = array_ops.matrix_band_part(scale_tril, -1,
                                                0)  # Zero out TriU.
        tril_diag = array_ops.matrix_diag_part(scale_tril)
        if scale_diag is not None:
            tril_diag += scale_diag
        if scale_identity_multiplier is not None:
            tril_diag += scale_identity_multiplier[..., array_ops.newaxis]

        scale_tril = array_ops.matrix_set_diag(scale_tril, tril_diag)

        return linalg.LinearOperatorLowerTriangular(
            tril=_maybe_attach_assertion(scale_tril),
            is_non_singular=True,
            is_self_adjoint=False,
            is_positive_definite=assert_positive)

    return make_diag_scale(loc=loc,
                           scale_diag=scale_diag,
                           scale_identity_multiplier=scale_identity_multiplier,
                           shape_hint=shape_hint,
                           validate_args=validate_args,
                           assert_positive=assert_positive,
                           name=name)
 def _batch_sqrt_log_det(self):
     # Here we compute the Cholesky factor of "C", then pass the result on.
     abs_diag_chol_c = math_ops.abs(
         array_ops.matrix_diag_part(
             self._chol_capacitance(batch_mode=True)))
     return self._sqrt_log_det_core(abs_diag_chol_c)
Example #44
0
    def _log_prob(self, x):
        if self.cholesky_input_output_matrices:
            x_sqrt = x
        else:
            # Complexity: O(nbk^3)
            x_sqrt = linalg_ops.cholesky(x)

        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        ndims = array_ops.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2
        sample_shape = array_ops.strided_slice(array_ops.shape(x_sqrt), [0],
                                               [sample_ndims])

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix. Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk**2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = array_ops.concat([
            math_ops.range(sample_ndims, ndims),
            math_ops.range(0, sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt,
                                                    perm)
        shape = array_ops.concat(
            (batch_shape,
             (math_ops.cast(self.dimension, dtype=dtypes.int32), -1)), 0)
        scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving
        # a vector system. E.g., for LinearOperatorDiag, each solve is O(k), so
        # this complexity is O(nbk**2). For LinearOperatorLowerTriangular,
        # each solve is O(k**2) so this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = array_ops.concat([batch_shape, event_shape, sample_shape], 0)
        scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = array_ops.concat([
            math_ops.range(ndims - sample_ndims, ndims),
            math_ops.range(0, ndims - sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt,
                                                    perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}**2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk**2)
        trace_scale_inv_x = math_ops.reduce_sum(
            math_ops.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = math_ops.reduce_sum(math_ops.log(
            array_ops.matrix_diag_part(x_sqrt)),
                                             axis=[-1])

        # Complexity: O(nbk**2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x - self.log_normalization())

        # Set shape hints.
        # Try to merge what we know from the input then what we know from the
        # parameters of this distribution.
        if x.get_shape().ndims is not None:
            log_prob.set_shape(x.get_shape()[:-2])
        if (log_prob.get_shape().ndims is not None
                and self.batch_shape.ndims is not None
                and self.batch_shape.ndims > 0):
            log_prob.get_shape()[-self.batch_shape.ndims:].merge_with(
                self.batch_shape)

        return log_prob
Example #45
0
def _MatrixDiagGrad(_, grad):
    return array_ops.matrix_diag_part(grad)
Example #46
0
 def loop_fn(i):
     input = array_ops.gather(x, i)  # pylint: disable=redefined-builtin
     return array_ops.matrix_diag_part(input,
                                       k=(-2, 0),
                                       padding_value=3,
                                       align="RIGHT_LEFT")
Example #47
0
def _MatrixDiagV3Grad(op, grad):
  return array_ops.matrix_diag_part(
      grad, k=op.inputs[1], align=op.get_attr("align")), None, None, None, None
    def _forward_log_det_jacobian(self, x):
        # Let Y be a symmetric, positive definite matrix and write:
        #   Y = X X.T
        # where X is lower-triangular.
        #
        # Observe that,
        #   dY[i,j]/dX[a,b]
        #   = d/dX[a,b] { X[i,:] X[j,:] }
        #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
        #
        # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
        # symmetric and X is lower-triangular, we need vectors of dimension:
        #   d = p (p + 1) / 2
        # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
        #   k = { i (i + 1) / 2 + j   i>=j
        #       { undef               i<j
        # and assume zero-based indexes. When k is undef, the element is dropped.
        # Example:
        #           j      k
        #        0 1 2 3  /
        #    0 [ 0 . . . ]
        # i  1 [ 1 2 . . ]
        #    2 [ 3 4 5 . ]
        #    3 [ 6 7 8 9 ]
        # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
        # slight abuse: k(i,j)=undef means the element is dropped.)
        #
        # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
        # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
        # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
        # (1) j<=i<a thus i,j!=a.
        # (2) i=a>j  thus i,j!=a.
        #
        # Since the Jacobian is lower-triangular, we need only compute the product
        # of diagonal elements:
        #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
        #   = X[j,j] + I[i=j] X[i,j]
        #   = 2 X[j,j].
        # Since there is a 2 X[j,j] term for every lower-triangular element of X we
        # conclude:
        #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
        diag = array_ops.matrix_diag_part(x)

        # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
        # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
        # output is unchanged.
        diag = self._make_columnar(diag)

        if self.validate_args:
            is_matrix = check_ops.assert_rank_at_least(
                x, 2, message="Input must be a (batch of) matrix.")
            shape = array_ops.shape(x)
            is_square = check_ops.assert_equal(
                shape[-2],
                shape[-1],
                message="Input must be a (batch of) square matrix.")
            # Assuming lower-triangular means we only need check diag>0.
            is_positive_definite = check_ops.assert_positive(
                diag, message="Input must be positive definite.")
            x = control_flow_ops.with_dependencies(
                [is_matrix, is_square, is_positive_definite], x)

        # Create a vector equal to: [p, p-1, ..., 2, 1].
        if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
            p_int = array_ops.shape(x)[-1]
            p_float = math_ops.cast(p_int, dtype=x.dtype)
        else:
            p_int = x.get_shape()[-1].value
            p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
        exponents = math_ops.linspace(p_float, 1., p_int)

        sum_weighted_log_diag = array_ops.squeeze(math_ops.matmul(
            math_ops.log(diag), exponents[..., array_ops.newaxis]),
                                                  axis=-1)
        fldj = p_float * np.log(2.) + sum_weighted_log_diag

        return fldj
Example #49
0
 def _inverse_log_det_jacobian(self, y):
   return self._diag_bijector.inverse_log_det_jacobian(
       array_ops.matrix_diag_part(y), event_ndims=1)
Example #50
0
def _MatrixDiagV2Grad(op, grad):
  return array_ops.matrix_diag_part(
      grad, k=op.inputs[1]), None, None, None, None
Example #51
0
    def do_filter(self, estimated_state, estimated_state_covariance,
                  predicted_observation, predicted_observation_covariance,
                  observation, observation_model, observation_noise):
        """Convenience function for scoring predictions.

    Scores a prediction against an observation, and computes the updated
    posterior over states.

    Shapes given below for arguments are for single-model Kalman filtering
    (e.g. KalmanFilter). For ensembles, prior_state and prior_state_var are
    same-length tuples of values corresponding to each model.

    Args:
      estimated_state: A prior mean over states [batch size x state dimension]
      estimated_state_covariance: Covariance of state prior [batch size x D x
          D], with D depending on the Kalman filter implementation (typically
          the state dimension).
      predicted_observation: A prediction for the observed value, such as that
          returned by observed_from_state. A [batch size x num features] Tensor.
      predicted_observation_covariance: A covariance matrix corresponding to
          `predicted_observation`, a [batch size x num features x num features]
          Tensor.
      observation: The observed value corresponding to the predictions
          given [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      observation_noise: A [batch size x observation dimension x observation
          dimension] Tensor or [observation dimension x observation dimension]
          Tensor with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      posterior_state, posterior_state_var: Posterior mean and
          covariance, updated versions of prior_state and
          prior_state_var.
      log_prediction_prob: Log probability of the observations under
          the priors, suitable for optimization (should be maximized).

    """
        symmetrized_observation_covariance = 0.5 * (
            predicted_observation_covariance +
            array_ops.matrix_transpose(predicted_observation_covariance))
        instability_message = (
            "This may occur due to numerically unstable filtering when there is "
            "a large difference in posterior variances, or when inferences are "
            "near-deterministic. Considering tuning the "
            "'filtering_maximum_posterior_variance_ratio' or "
            "'filtering_minimum_posterior_variance' parameters in your "
            "StateSpaceModelConfiguration, or tuning the transition matrix.")
        symmetrized_observation_covariance = numerics.verify_tensor_all_finite(
            symmetrized_observation_covariance,
            "Predicted observation covariance was not finite. {}".format(
                instability_message))
        diag = array_ops.matrix_diag_part(symmetrized_observation_covariance)
        min_diag = math_ops.reduce_min(diag)
        non_negative_assert = control_flow_ops.Assert(
            min_diag >= 0.,
            [("The predicted observation covariance "
              "has a negative diagonal entry. {}").format(instability_message),
             min_diag])
        with ops.control_dependencies([non_negative_assert]):
            observation_covariance_cholesky = linalg_ops.cholesky(
                symmetrized_observation_covariance)
        log_prediction_prob = distributions.MultivariateNormalTriL(
            predicted_observation,
            observation_covariance_cholesky).log_prob(observation)
        (posterior_state,
         posterior_state_var) = self.posterior_from_prior_state(
             prior_state=estimated_state,
             prior_state_var=estimated_state_covariance,
             observation=observation,
             observation_model=observation_model,
             predicted_observations=(predicted_observation,
                                     predicted_observation_covariance),
             observation_noise=observation_noise)
        return (posterior_state, posterior_state_var, log_prediction_prob)
 def _log_abs_determinant(self):
   if self._is_spd:
     diag = array_ops.matrix_diag_part(self._chol)
     return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])
   abs_det = math_ops.abs(self.determinant())
   return math_ops.log(abs_det)
Example #53
0
    def run_test_sample_consistent_mean_covariance(self,
                                                   sess_run_fn,
                                                   dist,
                                                   num_samples=int(1e5),
                                                   seed=24,
                                                   rtol=1e-2,
                                                   atol=0.,
                                                   cov_rtol=None,
                                                   cov_atol=None):
        """Tests that sample/mean/covariance are consistent with each other.

    "Consistency" means that `sample`, `mean`, `covariance`, etc all correspond
    to the same distribution.

    Args:
      sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and
        returning a list of results after running one "step" of TensorFlow
        computation, typically set to `sess.run`.
      dist: Distribution instance or object which implements `sample`,
        `log_prob`, `event_shape_tensor` and `batch_shape_tensor`.
      num_samples: Python `int` scalar indicating the number of Monte-Carlo
        samples to draw from `dist`.
      seed: Python `int` indicating the seed to use when sampling from `dist`.
        In general it is not recommended to use `None` during a test as this
        increases the likelihood of spurious test failure.
      rtol: Python `float`-type indicating the admissible relative error between
        analytical and sample statistics.
      atol: Python `float`-type indicating the admissible absolute error between
        analytical and sample statistics.
      cov_rtol: Python `float`-type indicating the admissible relative error
        between analytical and sample covariance. Default: rtol.
      cov_atol: Python `float`-type indicating the admissible absolute error
        between analytical and sample covariance. Default: atol.
    """

        x = dist.sample(num_samples, seed=seed)
        sample_mean = math_ops.reduce_mean(x, axis=0)
        sample_covariance = math_ops.reduce_mean(
            _vec_outer_square(x - sample_mean), axis=0)
        sample_variance = array_ops.matrix_diag_part(sample_covariance)
        sample_stddev = math_ops.sqrt(sample_variance)

        [
            sample_mean_, sample_covariance_, sample_variance_, sample_stddev_,
            mean_, covariance_, variance_, stddev_
        ] = sess_run_fn([
            sample_mean,
            sample_covariance,
            sample_variance,
            sample_stddev,
            dist.mean(),
            dist.covariance(),
            dist.variance(),
            dist.stddev(),
        ])

        self.assertAllClose(mean_, sample_mean_, rtol=rtol, atol=atol)
        self.assertAllClose(covariance_,
                            sample_covariance_,
                            rtol=cov_rtol or rtol,
                            atol=cov_atol or atol)
        self.assertAllClose(variance_, sample_variance_, rtol=rtol, atol=atol)
        self.assertAllClose(stddev_, sample_stddev_, rtol=rtol, atol=atol)
Example #54
0
 def _inverse(self, y):
   diag = self._diag_bijector.inverse(array_ops.matrix_diag_part(y))
   return array_ops.matrix_set_diag(y, diag)
Example #55
0
def tridiagonal_matmul(diagonals, rhs, diagonals_format='compact', name=None):
    r"""Multiplies tridiagonal matrix by matrix.

  `diagonals` is representation of 3-diagonal NxN matrix, which depends on
  `diagonals_format`.

  In `matrix` format, `diagonals` must be a tensor of shape `[..., M, M]`, with
  two inner-most dimensions representing the square tridiagonal matrices.
  Elements outside of the three diagonals will be ignored.

  If `sequence` format, `diagonals` is list or tuple of three tensors:
  `[superdiag, maindiag, subdiag]`, each having shape [..., M]. Last element
  of `superdiag` first element of `subdiag` are ignored.

  In `compact` format the three diagonals are brought together into one tensor
  of shape `[..., 3, M]`, with last two dimensions containing superdiagonals,
  diagonals, and subdiagonals, in order. Similarly to `sequence` format,
  elements `diagonals[..., 0, M-1]` and `diagonals[..., 2, 0]` are ignored.

  The `sequence` format is recommended as the one with the best performance.

  `rhs` is matrix to the right of multiplication. It has shape `[..., M, N]`.

  Example:

  ```python
  superdiag = tf.constant([-1, -1, 0], dtype=tf.float64)
  maindiag = tf.constant([2, 2, 2], dtype=tf.float64)
  subdiag = tf.constant([0, -1, -1], dtype=tf.float64)
  diagonals = [superdiag, maindiag, subdiag]
  rhs = tf.constant([[1, 1], [1, 1], [1, 1]], dtype=tf.float64)
  x = tf.linalg.tridiagonal_matmul(diagonals, rhs, diagonals_format='sequence')
  ```

  Args:
    diagonals: A `Tensor` or tuple of `Tensor`s describing left-hand sides. The
      shape depends of `diagonals_format`, see description above. Must be
      `float32`, `float64`, `complex64`, or `complex128`.
    rhs: A `Tensor` of shape [..., M, N] and with the same dtype as `diagonals`.
    diagonals_format: one of `sequence`, or `compact`. Default is `compact`.
    name:  A name to give this `Op` (optional).

  Returns:
    A `Tensor` of shape [..., M, N] containing the result of multiplication.

  Raises:
    ValueError: An unsupported type is provided as input, or when the input
    tensors have incorrect shapes.
  """
    if diagonals_format == 'compact':
        superdiag = diagonals[..., 0, :]
        maindiag = diagonals[..., 1, :]
        subdiag = diagonals[..., 2, :]
    elif diagonals_format == 'sequence':
        superdiag, maindiag, subdiag = diagonals
    elif diagonals_format == 'matrix':
        m1 = tensor_shape.dimension_value(diagonals.shape[-1])
        m2 = tensor_shape.dimension_value(diagonals.shape[-2])
        if m1 and m2 and m1 != m2:
            raise ValueError(
                'Expected last two dimensions of diagonals to be same, got {} and {}'
                .format(m1, m2))
        diags = array_ops.matrix_diag_part(diagonals,
                                           k=(-1, 1),
                                           padding_value=0.,
                                           align='LEFT_RIGHT')
        superdiag = diags[..., 0, :]
        maindiag = diags[..., 1, :]
        subdiag = diags[..., 2, :]
    else:
        raise ValueError('Unrecognized diagonals_format: %s' %
                         diagonals_format)

    # C++ backend requires matrices.
    # Converting 1-dimensional vectors to matrices with 1 row.
    superdiag = array_ops.expand_dims(superdiag, -2)
    maindiag = array_ops.expand_dims(maindiag, -2)
    subdiag = array_ops.expand_dims(subdiag, -2)

    return linalg_ops.tridiagonal_mat_mul(superdiag, maindiag, subdiag, rhs,
                                          name)
Example #56
0
 def testInvalidShape(self):
     with self.assertRaisesRegexp(ValueError, "must be at least rank 2"):
         array_ops.matrix_diag_part(0)
Example #57
0
 def _add_to_tensor(self, x):
     x_diag = array_ops.matrix_diag_part(x)
     new_diag = self._diag + x_diag
     return array_ops.matrix_set_diag(x, new_diag)