def test_defining_spd_operator_by_taking_real_part(self):
    with self.cached_session() as sess:
      # S is real and positive.
      s = linear_operator_test_util.random_uniform(
          shape=(10, 2, 3, 4), dtype=dtypes.float32, minval=1., maxval=2.)

      # Let S = S1 + S2, the Hermitian and anti-hermitian parts.
      # S1 = 0.5 * (S + S^H), S2 = 0.5 * (S - S^H),
      # where ^H is the Hermitian transpose of the function:
      #    f(n0, n1, n2)^H := ComplexConjugate[f(N0-n0, N1-n1, N2-n2)].
      # We want to isolate S1, since
      #   S1 is Hermitian by construction
      #   S1 is real since S is
      #   S1 is positive since it is the sum of two positive kernels

      # IDFT[S] = IDFT[S1] + IDFT[S2]
      #         =      H1  +      H2
      # where H1 is real since it is Hermitian,
      # and H2 is imaginary since it is anti-Hermitian.
      ifft_s = fft_ops.ifft3d(math_ops.cast(s, dtypes.complex64))

      # Throw away H2, keep H1.
      real_ifft_s = math_ops.real(ifft_s)

      # This is the perfect spectrum!
      # spectrum = DFT[H1]
      #          = S1,
      fft_real_ifft_s = fft_ops.fft3d(
          math_ops.cast(real_ifft_s, dtypes.complex64))

      # S1 is Hermitian ==> operator is real.
      # S1 is real ==> operator is self-adjoint.
      # S1 is positive ==> operator is positive-definite.
      operator = linalg.LinearOperatorCirculant3D(fft_real_ifft_s)

      # Allow for complex output so we can check operator has zero imag part.
      self.assertEqual(operator.dtype, dtypes.complex64)
      matrix, matrix_t = sess.run([
          operator.to_dense(),
          array_ops.matrix_transpose(operator.to_dense())
      ])
      operator.assert_positive_definite().run()  # Should not fail.
      np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6)
      self.assertAllClose(matrix, matrix_t)

      # Just to test the theory, get S2 as well.
      # This should create an imaginary operator.
      # S2 is anti-Hermitian ==> operator is imaginary.
      # S2 is real ==> operator is self-adjoint.
      imag_ifft_s = math_ops.imag(ifft_s)
      fft_imag_ifft_s = fft_ops.fft3d(
          1j * math_ops.cast(imag_ifft_s, dtypes.complex64))
      operator_imag = linalg.LinearOperatorCirculant3D(fft_imag_ifft_s)

      matrix, matrix_h = sess.run([
          operator_imag.to_dense(),
          array_ops.matrix_transpose(math_ops.conj(operator_imag.to_dense()))
      ])
      self.assertAllClose(matrix, matrix_h)
      np.testing.assert_allclose(0, np.real(matrix), atol=1e-7)
Esempio n. 2
0
 def testNonBatchMatrix(self):
   matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
   expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
   with self.test_session():
     transposed = array_ops.matrix_transpose(matrix)
     self.assertEqual((3, 2), transposed.get_shape())
     self.assertAllEqual(expected_transposed, transposed.eval())
Esempio n. 3
0
  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (num_cols, num_rows) if num_rows < num_cols else (num_rows,
                                                                   num_cols)

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    q *= math_ops.sign(d)
    if num_rows < num_cols:
      q = array_ops.matrix_transpose(q)
    return self.gain * array_ops.reshape(q, shape)
Esempio n. 4
0
def adjoint(matrix, name=None):
  """Transposes the last two dimensions of and conjugates tensor `matrix`.

  For example:

  ```python
  x = tf.constant([[1 + 1j, 2 + 2j, 3 + 3j],
                   [4 + 4j, 5 + 5j, 6 + 6j]])
  tf.linalg.adjoint(x)  # [[1 - 1j, 4 - 4j],
                        #  [2 - 2j, 5 - 5j],
                        #  [3 - 3j, 6 - 6j]]
  ```

  Args:
    matrix:  A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`,
      or `complex128` with shape `[..., M, M]`.
    name:  A name to give this `Op` (optional).

  Returns:
    The adjoint (a.k.a. Hermitian transpose a.k.a. conjugate transpose) of
    matrix.
  """
  with ops.name_scope(name, 'adjoint', [matrix]):
    matrix = ops.convert_to_tensor(matrix, name='matrix')
    return array_ops.matrix_transpose(matrix, conjugate=True)
Esempio n. 5
0
  def _overdetermined(op, grad):
    """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    x = op.outputs[0]
    a_shape = array_ops.shape(a)
    batch_shape = a_shape[:-2]
    n = a_shape[-1]

    identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype)
    gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity
    chol = linalg_ops.cholesky(gramian)
    # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
    z = linalg_ops.cholesky_solve(chol, grad)
    xzt = math_ops.matmul(x, z, adjoint_b=True)
    zx_sym = xzt + array_ops.matrix_transpose(xzt)
    grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True)
    grad_b = math_ops.matmul(a, z)
    return (grad_a, grad_b, None)
Esempio n. 6
0
  def __call__(self, shape, dtype=dtypes.float32):
    """Returns a tensor object initialized as specified by the initializer.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
       supported.

    Raises:
      ValueError: If the dtype is not floating point or the input shape is not
       valid.
    """
    dtype = _assert_float_dtype(dtype)
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = gen_linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    q *= math_ops.sign(d)
    if num_rows < num_cols:
      q = array_ops.matrix_transpose(q)
    return self.gain * array_ops.reshape(q, shape)
def _unvec_by(y, num_col):
  """Unstack vector to form a matrix, with a specified amount of columns."""
  return array_ops.matrix_transpose(
      array_ops.reshape(
          y,
          array_ops.concat(
              [array_ops.shape(y)[:-1], [num_col, -1]], axis=0)))
Esempio n. 8
0
 def testNonBatchMatrixDynamicallyDefined(self):
   matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
   expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
   with self.test_session():
     matrix_ph = array_ops.placeholder(dtypes.int32)
     transposed = array_ops.matrix_transpose(matrix_ph)
     self.assertAllEqual(
         expected_transposed, transposed.eval(feed_dict={matrix_ph: matrix}))
Esempio n. 9
0
 def testConjugate(self):
   m = [[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, 6 + 6j]]
   expected_transposed = [[1 - 1j, 4 - 4j], [2 - 2j, 5 - 5j], [3 - 3j, 6 - 6j]]
   with self.test_session():
     matrix = ops.convert_to_tensor(m)
     transposed = array_ops.matrix_transpose(matrix, conjugate=True)
     self.assertEqual((3, 2), transposed.get_shape())
     self.assertAllEqual(expected_transposed, transposed.eval())
Esempio n. 10
0
 def _covariance(self):
   if (isinstance(self.scale, linalg.LinearOperatorIdentity) or
       isinstance(self.scale, linalg.LinearOperatorScaledIdentity) or
       isinstance(self.scale, linalg.LinearOperatorDiag)):
     return array_ops.matrix_diag(math_ops.square(self.scale.diag_part()))
   else:
     # TODO(b/35040238): Remove transpose once LinOp supports `transpose`.
     return self.scale.apply(array_ops.matrix_transpose(self.scale.to_dense()))
Esempio n. 11
0
def sign_magnitude_positive_definite(
    raw, off_diagonal_scale=0., overall_scale=0.):
  """Constructs a positive definite matrix from an unconstrained input matrix.

  We want to keep the whole matrix on a log scale, but also allow off-diagonal
  elements to be negative, so the sign of off-diagonal elements is modeled
  separately from their magnitude (using the lower and upper triangles
  respectively). Specifically:

  for i < j, we have:
    output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) *
        exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2)

  output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2)

  output = output_cholesky^T * output_cholesky

  where raw, off_diagonal_scale, and overall_scale are
  un-constrained real-valued variables. The resulting values are stable
  around zero due to the exponential (and the softsign keeps the function
  smooth).

  Args:
    raw: A [..., M, M] Tensor.
    off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative
        scale of off-diagonal values in the output matrix.
    overall_scale: A scalar or [...] shaped Tensor controlling the overall scale
        of the output matrix.
  Returns:
    The `output` matrix described above, a [..., M, M] positive definite matrix.

  """
  raw = ops.convert_to_tensor(raw)
  diagonal = array_ops.matrix_diag_part(raw)
  def _right_pad_with_ones(tensor, target_rank):
    # Allow broadcasting even if overall_scale and off_diagonal_scale have batch
    # dimensions
    tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype)
    return array_ops.reshape(tensor,
                             array_ops.concat(
                                 [
                                     array_ops.shape(tensor), array_ops.ones(
                                         [target_rank - array_ops.rank(tensor)],
                                         dtype=target_rank.dtype)
                                 ],
                                 axis=0))
  # We divide the log values by 2 to compensate for the squaring that happens
  # when transforming Cholesky factors into positive definite matrices.
  sign_magnitude = (gen_math_ops.exp(
      (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) +
       _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) *
                    nn.softsign(array_ops.matrix_transpose(raw)))
  sign_magnitude.set_shape(raw.get_shape())
  cholesky_factor = array_ops.matrix_set_diag(
      input=array_ops.matrix_band_part(sign_magnitude, 0, -1),
      diagonal=gen_math_ops.exp((diagonal + _right_pad_with_ones(
          overall_scale, array_ops.rank(diagonal))) / 2.))
  return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
Esempio n. 12
0
  def test_cholesky(self):
    z = random_ops.random_normal([2, 3, 3])
    x = (math_ops.matmul(z, array_ops.matrix_transpose(z))  # Ensure pos. def.
         + linalg_ops.eye(3))  # Ensure well-conditioned.

    def loop_fn(i):
      return linalg_ops.cholesky(array_ops.gather(x, i))

    self._test_loop_fn(loop_fn, 2)
Esempio n. 13
0
def _GradWithInverseL(l, l_inverse, grad):
  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)
  grad_a = math_ops.matmul(
      math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)
  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
  return grad_a * 0.5
Esempio n. 14
0
def TriAngSolveCompositeGrad(l, grad):
  # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1}

  # Compute ((l^{H} @ grad) * (tril(ones)-1/2*eye)) = middle
  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)

  # Compute l^{-H} @ middle = z
  l_inverse_middle = linalg_ops.matrix_triangular_solve(l, middle, adjoint=True)

  # We need to compute z @ l^{-1}. With matrix_triangular_solve we
  # actually compute l^{-H} @ z^{H} = grad. Since we later add grad^{H}
  # we can ommit the conjugate transpose here.
  z_h = math_ops.conj(array_ops.matrix_transpose(l_inverse_middle))
  grad_a = linalg_ops.matrix_triangular_solve(l, z_h, adjoint=True)
  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
  return grad_a * 0.5
Esempio n. 15
0
def _tridiagonal_solve_compact_format(diagonals,
                                      rhs,
                                      transpose_rhs=False,
                                      conjugate_rhs=False,
                                      name=None):
  """Helper function used after the input has been cast to compact form."""
  diags_rank, rhs_rank = len(diagonals.shape), len(rhs.shape)

  if diags_rank < 2:
    raise ValueError(
        'Expected diagonals to have rank at least 2, got {}'.format(diags_rank))
  if rhs_rank != diags_rank and rhs_rank != diags_rank - 1:
    raise ValueError('Expected the rank of rhs to be {} or {}, got {}'.format(
        diags_rank - 1, diags_rank, rhs_rank))
  if diagonals.shape[-2] != 3:
    raise ValueError('Expected 3 diagonals got {}'.format(diagonals.shape[-2]))
  if not diagonals.shape[:-2].is_compatible_with(rhs.shape[:diags_rank - 2]):
    raise ValueError('Batch shapes {} and {} are incompatible'.format(
        diagonals.shape[:-2], rhs.shape[:diags_rank - 2]))

  def check_num_lhs_matches_num_rhs():
    if diagonals.shape[-1] != rhs.shape[-2]:
      raise ValueError('Expected number of left-hand sided and right-hand '
                       'sides to be equal, got {} and {}'.format(
                           diagonals.shape[-1], rhs.shape[-2]))

  if rhs_rank == diags_rank - 1:
    # Rhs provided as a vector, ignoring transpose_rhs
    if conjugate_rhs:
      rhs = math_ops.conj(rhs)
    rhs = array_ops.expand_dims(rhs, -1)
    check_num_lhs_matches_num_rhs()
    return array_ops.squeeze(
        linalg_ops.tridiagonal_solve(diagonals, rhs, name), -1)

  if transpose_rhs:
    rhs = array_ops.matrix_transpose(rhs, conjugate=conjugate_rhs)
  elif conjugate_rhs:
    rhs = math_ops.conj(rhs)

  check_num_lhs_matches_num_rhs()
  result = linalg_ops.tridiagonal_solve(diagonals, rhs, name)
  return array_ops.matrix_transpose(result) if transpose_rhs else result
Esempio n. 16
0
 def testBatchMatrix(self):
   matrix_0 = [[1, 2, 3], [4, 5, 6]]
   matrix_0_t = [[1, 4], [2, 5], [3, 6]]
   matrix_1 = [[11, 22, 33], [44, 55, 66]]
   matrix_1_t = [[11, 44], [22, 55], [33, 66]]
   batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
   expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
   with self.test_session():
     transposed = array_ops.matrix_transpose(batch_matrix)
     self.assertEqual((2, 3, 2), transposed.get_shape())
     self.assertAllEqual(expected_transposed, transposed.eval())
Esempio n. 17
0
 def testBatchMatrixDynamicallyDefined(self):
   matrix_0 = [[1, 2, 3], [4, 5, 6]]
   matrix_0_t = [[1, 4], [2, 5], [3, 6]]
   matrix_1 = [[11, 22, 33], [44, 55, 66]]
   matrix_1_t = [[11, 44], [22, 55], [33, 66]]
   batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
   expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
   with self.test_session():
     batch_matrix_ph = array_ops.placeholder(dtypes.int32)
     transposed = array_ops.matrix_transpose(batch_matrix_ph)
     self.assertAllEqual(
         expected_transposed,
         transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
Esempio n. 18
0
 def _stddev(self):
   if (isinstance(self.scale, linalg.LinearOperatorIdentity) or
       isinstance(self.scale, linalg.LinearOperatorScaledIdentity) or
       isinstance(self.scale, linalg.LinearOperatorDiag)):
     return math_ops.abs(self.scale.diag_part())
   elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate)
         and self.scale.is_self_adjoint):
     return math_ops.sqrt(array_ops.matrix_diag_part(
         self.scale.apply(self.scale.to_dense())))
   else:
     # TODO(b/35040238): Remove transpose once LinOp supports `transpose`.
     return math_ops.sqrt(array_ops.matrix_diag_part(
         self.scale.apply(array_ops.matrix_transpose(self.scale.to_dense()))))
  def test_real_hermitian_spectrum_gives_real_symmetric_operator(self):
    with self.cached_session() as sess:
      # This is a real and hermitian spectrum.
      spectrum = [[1., 2., 2.], [3., 4., 4.], [3., 4., 4.]]
      operator = linalg.LinearOperatorCirculant(spectrum)

      matrix_tensor = operator.to_dense()
      self.assertEqual(matrix_tensor.dtype, dtypes.complex64)
      matrix_t = array_ops.matrix_transpose(matrix_tensor)
      imag_matrix = math_ops.imag(matrix_tensor)
      matrix, matrix_transpose, imag_matrix = sess.run(
          [matrix_tensor, matrix_t, imag_matrix])

      np.testing.assert_allclose(0, imag_matrix, atol=1e-6)
      self.assertAllClose(matrix, matrix_transpose, atol=0)
Esempio n. 20
0
  def _symmetric_projection(self, n):
    """Compute a n x n symmetric projection matrix.

    Args:
      n: dimension.
    Returns:
      a n x n symmetric projection matrix, i.e. a matrix P s.t. P=P*P, P=P^T.
    """
    q = self._orthogonal_matrix(n)
    # randomly zeroing out some columns
    mask = math_ops.cast(random_ops.random_normal([n], seed=self.seed) > 0,
                         self.dtype)
    if self.seed:
      self.seed += 1
    c = math_ops.multiply(q, mask)
    return math_ops.matmul(c, array_ops.matrix_transpose(c))
  def _updated_mat(self, mat, v, diag):
    # Get dense matrix defined by its square root, which is an update of `mat`:
    # A = (mat + v D v^T) (mat + v D v^T)^T
    # D is the diagonal matrix with `diag` on the diagonal.

    # If diag is None, then it defaults to the identity matrix, so DV^T = V^T
    if diag is None:
      diag_vt = array_ops.matrix_transpose(v)
    else:
      diag_mat = array_ops.matrix_diag(diag)
      diag_vt = math_ops.matmul(diag_mat, v, adjoint_b=True)

    v_diag_vt = math_ops.matmul(v, diag_vt)
    sqrt = mat + v_diag_vt
    a = math_ops.matmul(sqrt, sqrt, adjoint_b=True)
    return a.eval()
def matrix_adjoint(a, name="matrix_adjoint"):
  """Transposes last two dimensions of tensor `a`, and takes complex conjugate.

  If `a` is real valued, the result is equivalent to `matrix_transpose`.

  For example:

  ```python
  # Matrix with no batch dimension.
  # 'x' is [[1 2 3j]
  #         [4 5 -6j]]
  tf.matrix_adjoint(x) ==> [[1 4]
                            [2 5]
                            [-3j 6j]]

  # Matrix with two batch dimensions.
  # x.shape is [1, 2, 3, 4]
  # tf.matrix_adjoint(x) is shape [1, 2, 4, 3]
  ```

  Note that `tf.matmul` provides kwargs allowing for adjoint of arguments.  This
  is done with minimal cost, and is preferable to using this function. E.g.

  ```
  # Good!  Adjoint is taken at minimal additional cost.
  tf.matmul(matrix, b, adjoint_b=True)

  # Inefficient!
  tf.matmul(matrix, tf.matrix_adjoint(b))
  ```

  Args:
    a: A `Tensor` with `rank >= 2`.
    name: A name for the operation (optional).

  Returns:
    A batch matrix `Tensor` with same `dtype` as `a`.

  Raises:
    ValueError:  If `a` is determined statically to have `rank < 2`.
  """
  with ops.name_scope(name, values=[a]):
    a = ops.convert_to_tensor(a, name="a")
    a_transpose = array_ops.matrix_transpose(a)
    return math_ops.conj(a_transpose)
Esempio n. 23
0
def _CholeskyGrad(op, grad):
  """Gradient for Cholesky."""

  # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1}
  l = op.outputs[0]
  num_rows = array_ops.shape(l)[-1]
  batch_shape = array_ops.shape(l)[:-2]
  l_inverse = linalg_ops.matrix_triangular_solve(
      l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype))

  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)

  grad_a = math_ops.matmul(
      math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)

  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
  return grad_a * 0.5
Esempio n. 24
0
def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
  """Gradient for SelfAdjointEigV2."""
  e = op.outputs[0]
  compute_v = op.get_attr("compute_v")
  # a = op.inputs[0], which satisfies
  # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
  with ops.control_dependencies([grad_e, grad_v]):
    if compute_v:
      v = op.outputs[1]
      # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
      # Notice that because of the term involving f, the gradient becomes
      # infinite (or NaN in practice) when eigenvalues are not unique.
      # Mathematically this should not be surprising, since for (k-fold)
      # degenerate eigenvalues, the corresponding eigenvectors are only defined
      # up to arbitrary rotation in a (k-dimensional) subspace.
      f = array_ops.matrix_set_diag(
          math_ops.reciprocal(
              array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)),
          array_ops.zeros_like(e))
      grad_a = math_ops.matmul(
          v,
          math_ops.matmul(
              array_ops.matrix_diag(grad_e) +
              f * math_ops.matmul(v, grad_v, adjoint_a=True),
              v,
              adjoint_b=True))
    else:
      _, v = linalg_ops.self_adjoint_eig(op.inputs[0])
      grad_a = math_ops.matmul(v,
                               math_ops.matmul(
                                   array_ops.matrix_diag(grad_e),
                                   v,
                                   adjoint_b=True))
    # The forward op only depends on the lower triangular part of a, so here we
    # symmetrize and take the lower triangle
    grad_a = array_ops.matrix_band_part(
        grad_a + math_ops.conj(array_ops.matrix_transpose(grad_a)), -1, 0)
    grad_a = array_ops.matrix_set_diag(grad_a,
                                       0.5 * array_ops.matrix_diag_part(grad_a))
    return grad_a
Esempio n. 25
0
  def _Overdetermined(op, grad):
    """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    x = op.outputs[0]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    # pylint: disable=protected-access
    chol = linalg_ops._RegularizedGramianCholesky(
        a, l2_regularizer=l2_regularizer, first_kind=True)
    # pylint: enable=protected-access
    # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
    z = linalg_ops.cholesky_solve(chol, grad)
    xzt = math_ops.matmul(x, z, adjoint_b=True)
    zx_sym = xzt + array_ops.matrix_transpose(xzt)
    grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True)
    grad_b = math_ops.matmul(a, z)
    return (grad_a, grad_b, None)
 def _batch_matmul(self, x, transpose_x=False):
     if transpose_x:
         x = array_ops.matrix_transpose(x)
     self._check_x(x)
     return x
Esempio n. 27
0
 def _Adjoint(x):
     return math_ops.conj(array_ops.matrix_transpose(x))
 def _batch_matmul(self, x, transpose_x=False):
   if transpose_x:
     x = array_ops.matrix_transpose(x)
   diag_mat = array_ops.expand_dims(self._diag, -1)
   return math_ops.square(diag_mat) * x
Esempio n. 29
0
def _SvdGrad(op, grad_s, grad_u, grad_v):
    """Gradient for Svd based on Giles' algorithm. Reference at top of file."""

    if op.get_attr("compute_uv") and not op.get_attr("full_matrices"):
        raise NotImplementedError(
            "SVD gradient is not implemented for compute_uv=True and "
            "full_matrices=False.")

    a = op.inputs[0]
    a_shape = a.get_shape().with_rank_at_least(2)

    if op.get_attr("compute_uv"):
        # TODO(rmlarsen): Make this work with complex types.
        if a.dtype.is_complex:
            raise NotImplementedError(
                "SVD gradient is not implemented for complex types and "
                "compute_uv=True.")
        grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
        grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
        m = a_shape[-2].merge_with(grad_u_shape[-2])
        n = a_shape[-1].merge_with(grad_v_shape[-2])
        batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
            grad_v_shape[:-2])
        a_shape = batch_shape.concatenate([m, n])

    m = a_shape[-2].value
    n = a_shape[-1].value
    # TODO(rmlarsen): Make this work with placeholders.
    if m is None or n is None:
        raise NotImplementedError(
            "SVD gradient has not been implemented for input with unknown "
            "inner matrix shape.")

    if not op.get_attr("full_matrices") or not op.get_attr("compute_uv"):
        s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True)
    else:
        s = op.outputs[0]
        u = op.outputs[1]
        v = op.outputs[2]

    use_adjoint = False
    if m > n:
        # Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the
        # Hermitian transpose of the gradient at the end.
        use_adjoint = True
        m, n = n, m
        u, v = v, u
        grad_u, grad_v = grad_v, grad_u

    with ops.control_dependencies([grad_s, grad_u, grad_v]):
        grad_s_mat = array_ops.matrix_diag(grad_s)
        if not op.get_attr("compute_uv"):
            if use_adjoint:
                grad_a = math_ops.matmul(v[..., :, :m],
                                         math_ops.matmul(u, grad_s_mat),
                                         adjoint_b=True)
            else:
                grad_a = math_ops.matmul(
                    u,
                    math_ops.matmul(grad_s_mat, v[..., :, :m], adjoint_b=True))
            grad_a.set_shape(a_shape)
            return grad_a

        # TODO(rmlarsen): Define a gradient that is numerically stable for
        # abs(m-n) > 1. Currently this does not work because there are effectively
        # multiple singular values with value zero. I am not sure if this is a true
        # instability or if it simply throws off the finite difference gradient
        # checker.
        if abs(m - n) > 1:
            raise NotImplementedError(
                "svd gradient is not implemented for abs(m - n) > 1")
        s_mat = array_ops.matrix_diag(s)
        s2 = math_ops.square(s)

        # NOTICE: Because of the term involving f, the gradient becomes
        # infinite (or NaN in practice) when singular values are not unique.
        # Mathematically this should not be surprising, since for (k-fold)
        # degenerate singular values, the corresponding singular vectors are
        # only defined up a (k-dimensional) subspace. In practice, this can
        # lead to numerical instability when singular values are close but not
        # exactly equal.
        f = array_ops.matrix_set_diag(
            math_ops.reciprocal(
                array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)),
            array_ops.zeros_like(s))
        s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s))
        u_gu = math_ops.matmul(u, grad_u, adjoint_a=True)
        v_gv = math_ops.matmul(v, grad_v, adjoint_a=True)

        if m == n:
            f_u = f * u_gu
            f_v = f * v_gv
        else:
            dv2 = array_ops.matrix_transpose(
                v_gv[..., m:n, :m]) - v_gv[..., :m, m:n]
            f_u = f * u_gu
            f_v = f * v_gv[..., :m, :m]

        grad_a_nouv = (grad_s_mat +
                       math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) +
                       math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v)))

        if m != n:
            grad_a_nouv = array_ops.concat(
                [grad_a_nouv, math_ops.matmul(s_inv_mat, dv2)], -1)

        if use_adjoint:
            # Use (U X V^H)^H = V (U X)^H.
            grad_a = math_ops.matmul(v,
                                     math_ops.matmul(u, grad_a_nouv),
                                     adjoint_b=True)
        else:
            grad_a = math_ops.matmul(
                u, math_ops.matmul(grad_a_nouv, v, adjoint_b=True))

        grad_a.set_shape(a_shape)
        return grad_a
Esempio n. 30
0
 def _batch_sqrt_matmul(self, x, transpose_x=False):
   if transpose_x:
     x = array_ops.matrix_transpose(x)
   diag_mat = array_ops.expand_dims(self._diag, -1)
   return diag_mat * x
    def do_filter(self, estimated_state, estimated_state_covariance,
                  predicted_observation, predicted_observation_covariance,
                  observation, observation_model, observation_noise):
        """Convenience function for scoring predictions.

    Scores a prediction against an observation, and computes the updated
    posterior over states.

    Shapes given below for arguments are for single-model Kalman filtering
    (e.g. KalmanFilter). For ensembles, prior_state and prior_state_var are
    same-length tuples of values corresponding to each model.

    Args:
      estimated_state: A prior mean over states [batch size x state dimension]
      estimated_state_covariance: Covariance of state prior [batch size x D x
          D], with D depending on the Kalman filter implementation (typically
          the state dimension).
      predicted_observation: A prediction for the observed value, such as that
          returned by observed_from_state. A [batch size x num features] Tensor.
      predicted_observation_covariance: A covariance matrix corresponding to
          `predicted_observation`, a [batch size x num features x num features]
          Tensor.
      observation: The observed value corresponding to the predictions
          given [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      observation_noise: A [batch size x observation dimension x observation
          dimension] Tensor or [observation dimension x observation dimension]
          Tensor with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      posterior_state, posterior_state_var: Posterior mean and
          covariance, updated versions of prior_state and
          prior_state_var.
      log_prediction_prob: Log probability of the observations under
          the priors, suitable for optimization (should be maximized).

    """
        symmetrized_observation_covariance = 0.5 * (
            predicted_observation_covariance +
            array_ops.matrix_transpose(predicted_observation_covariance))
        instability_message = (
            "This may occur due to numerically unstable filtering when there is "
            "a large difference in posterior variances, or when inferences are "
            "near-deterministic. Considering tuning the "
            "'filtering_maximum_posterior_variance_ratio' or "
            "'filtering_minimum_posterior_variance' parameters in your "
            "StateSpaceModelConfiguration, or tuning the transition matrix.")
        symmetrized_observation_covariance = numerics.verify_tensor_all_finite(
            symmetrized_observation_covariance,
            "Predicted observation covariance was not finite. {}".format(
                instability_message))
        diag = array_ops.matrix_diag_part(symmetrized_observation_covariance)
        min_diag = math_ops.reduce_min(diag)
        non_negative_assert = control_flow_ops.Assert(
            min_diag >= 0.,
            [("The predicted observation covariance "
              "has a negative diagonal entry. {}").format(instability_message),
             min_diag])
        with ops.control_dependencies([non_negative_assert]):
            observation_covariance_cholesky = linalg_ops.cholesky(
                symmetrized_observation_covariance)
        log_prediction_prob = distributions.MultivariateNormalTriL(
            predicted_observation,
            observation_covariance_cholesky).log_prob(observation)
        (posterior_state,
         posterior_state_var) = self.posterior_from_prior_state(
             prior_state=estimated_state,
             prior_state_var=estimated_state_covariance,
             observation=observation,
             observation_model=observation_model,
             predicted_observations=(predicted_observation,
                                     predicted_observation_covariance),
             observation_noise=observation_noise)
        return (posterior_state, posterior_state_var, log_prediction_prob)
Esempio n. 32
0
    def _matmul(self, x, adjoint=False, adjoint_arg=False):
        # Here we heavily rely on Roth's column Lemma [1]:
        # (A x B) * vec X = vec BXA^T,
        # where vec stacks all the columns of the matrix under each other. In our
        # case, x represents a batch of vec X (i.e. we think of x as a batch of
        # column vectors, rather than a matrix). Each member of the batch can be
        # reshaped to a matrix (hence we get a batch of matrices).
        # We can iteratively apply this lemma by noting that if B is a Kronecker
        # product, then we can apply the lemma again.

        # [1] W. E. Roth, "On direct product matrices,"
        # Bulletin of the American Mathematical Society, vol. 40, pp. 461-468,
        # 1934

        # Efficiency

        # Naively doing the Kronecker product, by calculating the dense matrix and
        # applying it will can take cubic time in  the size of domain_dimension
        # (assuming a square matrix). The other issue is that calculating the dense
        # matrix can be prohibitively expensive, in that it can take a large amount
        # of memory.
        #
        # This implementation avoids this memory blow up by only computing matmuls
        # with the factors. In this way, we don't have to realize the dense matrix.
        # In terms of complexity, if we have Kronecker Factors of size:
        # (n1, n1), (n2, n2), (n3, n3), ... (nJ, nJ), with N = \prod n_i, and we
        # have as input a [N, M] matrix, the naive approach would take O(N^2 M).
        # With this approach (ignoring reshaping of tensors and transposes for now),
        # the time complexity can be O(M * (\sum n_i) * N). There is also the
        # benefit of batched multiplication (In this example, the batch size is
        # roughly M * N) so this can be much faster. However, not factored in are
        # the costs of the several transposing of tensors, which can affect cache
        # behavior.

        # Below we document the shape manipulation for adjoint=False,
        # adjoint_arg=False, but the general case of different adjoints is still
        # handled.

        if adjoint_arg:
            x = linalg.adjoint(x)

        # Always add a batch dimension to enable broadcasting to work.
        batch_shape = array_ops.concat(
            [array_ops.ones_like(self.batch_shape_tensor()), [1, 1]], 0)
        x += array_ops.zeros(batch_shape, dtype=x.dtype.base_dtype)

        # x has shape [B, R, C], where B represent some number of batch dimensions,
        # R represents the number of rows, and C represents the number of columns.
        # In order to apply Roth's column lemma, we need to operate on a batch of
        # column vectors, so we reshape into a batch of column vectors. We put it
        # at the front to ensure that broadcasting between operators to the batch
        # dimensions B still works.
        output = _rotate_last_dim(x, rotate_right=True)

        # Also expand the shape to be [A, C, B, R]. The first dimension will be
        # used to accumulate dimensions from each operator matmul.
        output = output[array_ops.newaxis, ...]

        # In this loop, A is going to refer to the value of the accumulated
        # dimension. A = 1 at the start, and will end up being self.range_dimension.
        # V will refer to the last dimension. V = R at the start, and will end up
        # being 1 in the end.
        for operator in self.operators[:-1]:
            # Reshape output from [A, C, B, V] to be
            # [A, C, B, V / op.domain_dimension, op.domain_dimension]
            if adjoint:
                operator_dimension = operator.range_dimension_tensor()
            else:
                operator_dimension = operator.domain_dimension_tensor()

            output = _unvec_by(output, operator_dimension)

            # We are computing (XA^T) = (AX^T)^T.
            # output has [A, C, B, V / op.domain_dimension, op.domain_dimension],
            # which is being converted to:
            # [A, C, B, V / op.domain_dimension, op.range_dimension]
            output = array_ops.matrix_transpose(output)
            output = operator.matmul(output,
                                     adjoint=adjoint,
                                     adjoint_arg=False)
            output = array_ops.matrix_transpose(output)
            # Rearrange it to [A * op.range_dimension, C, B, V / op.domain_dimension]
            output = _rotate_last_dim(output, rotate_right=False)
            output = _vec(output)
            output = _rotate_last_dim(output, rotate_right=True)

        # After the loop, we will have
        # A = self.range_dimension / op[-1].range_dimension
        # V = op[-1].domain_dimension

        # We convert that using matvec to get:
        # [A, C, B, op[-1].range_dimension]
        output = self.operators[-1].matvec(output, adjoint=adjoint)
        # Rearrange shape to be [B1, ... Bn, self.range_dimension, C]
        output = _rotate_last_dim(output, rotate_right=False)
        output = _vec(output)
        output = _rotate_last_dim(output, rotate_right=False)

        if x.shape.is_fully_defined():
            column_dim = x.shape[-1]
            broadcast_batch_shape = common_shapes.broadcast_shape(
                x.shape[:-2], self.batch_shape)
            if adjoint:
                matrix_dimensions = [self.domain_dimension, column_dim]
            else:
                matrix_dimensions = [self.range_dimension, column_dim]

            output.set_shape(
                broadcast_batch_shape.concatenate(matrix_dimensions))

        return output
Esempio n. 33
0
def _vec(x):
    """Stacks column of matrix to form a single column."""
    return array_ops.reshape(
        array_ops.matrix_transpose(x),
        array_ops.concat([array_ops.shape(x)[:-2], [-1]], axis=0))
Esempio n. 34
0
    def _solve(self, rhs, adjoint=False, adjoint_arg=False):
        # Here we follow the same use of Roth's column lemma as in `matmul`, with
        # the key difference that we replace all `matmul` instances with `solve`.
        # This follows from the property that inv(A x B) = inv(A) x inv(B).

        # Below we document the shape manipulation for adjoint=False,
        # adjoint_arg=False, but the general case of different adjoints is still
        # handled.

        if adjoint_arg:
            rhs = linalg.adjoint(rhs)

        # Always add a batch dimension to enable broadcasting to work.
        batch_shape = array_ops.concat(
            [array_ops.ones_like(self.batch_shape_tensor()), [1, 1]], 0)
        rhs += array_ops.zeros(batch_shape, dtype=rhs.dtype.base_dtype)

        # rhs has shape [B, R, C], where B represent some number of batch
        # dimensions,
        # R represents the number of rows, and C represents the number of columns.
        # In order to apply Roth's column lemma, we need to operate on a batch of
        # column vectors, so we reshape into a batch of column vectors. We put it
        # at the front to ensure that broadcasting between operators to the batch
        # dimensions B still works.
        output = _rotate_last_dim(rhs, rotate_right=True)

        # Also expand the shape to be [A, C, B, R]. The first dimension will be
        # used to accumulate dimensions from each operator matmul.
        output = output[array_ops.newaxis, ...]

        # In this loop, A is going to refer to the value of the accumulated
        # dimension. A = 1 at the start, and will end up being self.range_dimension.
        # V will refer to the last dimension. V = R at the start, and will end up
        # being 1 in the end.
        for operator in self.operators[:-1]:
            # Reshape output from [A, C, B, V] to be
            # [A, C, B, V / op.domain_dimension, op.domain_dimension]
            if adjoint:
                operator_dimension = operator.range_dimension_tensor()
            else:
                operator_dimension = operator.domain_dimension_tensor()

            output = _unvec_by(output, operator_dimension)

            # We are computing (XA^-1^T) = (A^-1 X^T)^T.
            # output has [A, C, B, V / op.domain_dimension, op.domain_dimension],
            # which is being converted to:
            # [A, C, B, V / op.domain_dimension, op.range_dimension]
            output = array_ops.matrix_transpose(output)
            output = operator.solve(output, adjoint=adjoint, adjoint_arg=False)
            output = array_ops.matrix_transpose(output)
            # Rearrange it to [A * op.range_dimension, C, B, V / op.domain_dimension]
            output = _rotate_last_dim(output, rotate_right=False)
            output = _vec(output)
            output = _rotate_last_dim(output, rotate_right=True)

        # After the loop, we will have
        # A = self.range_dimension / op[-1].range_dimension
        # V = op[-1].domain_dimension

        # We convert that using matvec to get:
        # [A, C, B, op[-1].range_dimension]
        output = self.operators[-1].solvevec(output, adjoint=adjoint)
        # Rearrange shape to be [B1, ... Bn, self.range_dimension, C]
        output = _rotate_last_dim(output, rotate_right=False)
        output = _vec(output)
        output = _rotate_last_dim(output, rotate_right=False)

        if rhs.shape.is_fully_defined():
            column_dim = rhs.shape[-1]
            broadcast_batch_shape = common_shapes.broadcast_shape(
                rhs.shape[:-2], self.batch_shape)
            if adjoint:
                matrix_dimensions = [self.domain_dimension, column_dim]
            else:
                matrix_dimensions = [self.range_dimension, column_dim]

            output.set_shape(
                broadcast_batch_shape.concatenate(matrix_dimensions))

        return output
Esempio n. 35
0
 def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self):
   vector = [1, 2, 3]
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "should be a "):
       array_ops.matrix_transpose(vector)
def _reshape_for_efficiency(a,
                            b,
                            transpose_a=False,
                            transpose_b=False,
                            adjoint_a=False,
                            adjoint_b=False):
    """Maybe reshape a, b, and return an inverse map.  For matmul/solve."""
    def identity(x):
        return x

    # At this point, we have not taken transpose/adjoint of a/b.
    still_need_to_transpose = True

    if a.shape.ndims is None or b.shape.ndims is None:
        return a, b, identity, still_need_to_transpose

    # This could be handled in the future, but seems less common.
    if a.shape.ndims >= b.shape.ndims:
        return a, b, identity, still_need_to_transpose

    # From now on, we might modify b, but will not modify a.

    # Suppose:
    #   a.shape =     C + [m, n], b.shape =
    #   b.shape = S + C + [n, r]
    b_extra_ndims = b.shape.ndims - a.shape.ndims

    # b_extra_sh = S, b_main_sh = C + [n, r]
    b_extra_sh = array_ops.shape(b)[:b_extra_ndims]
    b_main_sh = array_ops.shape(b)[b_extra_ndims:]

    # No reason to flip unless the extra dims of b are big enough.  Why?
    # Assume adjoint/transpose = False.  Then...
    # By not flipping, we have to replicate a to shape
    #   b_extra_sh + a.shape,
    # which could use extra memory.  But in all cases, the final output has shape
    #   b_extra_sh + a.shape[:-1] + [b.shape[-1]]
    # So we only end up creating a larger object if the end dim of b is smaller
    # than the end dim of a.  This often happens, e.g. if b was a vector that was
    # expanded to a matrix (by appending a singleton).

    # Since adjoint/transpose may not be False, we must make adjustments here.
    # The dim of b that holds the multiple equations.
    a_domain_sz_ = a.shape[-2 if adjoint_a or transpose_a else -1]
    b_eq_sz_ = b.shape[-2 if adjoint_b or transpose_b else -1]
    b_extra_sz_ = (np.prod(b.shape[:b_extra_ndims].as_list())
                   if b.shape[:b_extra_ndims].is_fully_defined() else None)
    if (a_domain_sz_ is not None and b_eq_sz_ is not None
            and b_extra_sz_ is not None):
        if b_extra_sz_ < 2 or a_domain_sz_ <= b_eq_sz_:
            return a, b, identity, still_need_to_transpose

    # At this point, we're flipping for sure!
    # Any transposes/adjoints will happen here explicitly, rather than in calling
    # code.  Why?  To avoid having to write separate complex code for each case.
    if adjoint_a:
        a = array_ops.matrix_transpose(a, conjugate=True)
    elif transpose_a:
        a = array_ops.matrix_transpose(a, conjugate=False)
    if adjoint_b:
        b = array_ops.matrix_transpose(b, conjugate=True)
    elif transpose_a:
        b = array_ops.matrix_transpose(b, conjugate=False)
    still_need_to_transpose = False

    # Recompute shapes, since the transpose/adjoint may have changed them.
    b_extra_sh = array_ops.shape(b)[:b_extra_ndims]
    b_main_sh = array_ops.shape(b)[b_extra_ndims:]

    # Permutation to put the extra dims at the end.
    perm = (np.concatenate(
        (np.arange(b_extra_ndims, b.shape.ndims), np.arange(0, b_extra_ndims)),
        0))
    b_extra_on_end = array_ops.transpose(b, perm=perm)

    # Now squash this end into one long dim.
    b_squashed_end = array_ops.reshape(
        b_extra_on_end, array_ops.concat((b_main_sh[:-1], [-1]), 0))

    def reshape_inv(y):
        # Expand the extra dims hanging off the end, "b_extra_sh".
        # Note we use y_sh[:-1] + [b_main_sh[-1]] rather than b_main_sh, because y
        # Could have different batch dims than a and b, because of broadcasting.
        y_extra_shape = array_ops.concat(
            (array_ops.shape(y)[:-1], [b_main_sh[-1]], b_extra_sh), 0)
        y_extra_on_end = array_ops.reshape(y, y_extra_shape)
        inverse_perm = np.argsort(perm)
        return array_ops.transpose(y_extra_on_end, perm=inverse_perm)

    return a, b_squashed_end, reshape_inv, still_need_to_transpose
    def __init__(self,
                 loc=None,
                 covariance_matrix=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="MultivariateNormalFullCovariance"):
        """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `covariance_matrix` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `covariance_matrix`. The last dimension of `loc` (if provided) must
    broadcast with this.

    A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive
    definite matrix.  In other words it is (real) symmetric with all eigenvalues
    strictly positive.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      covariance_matrix: Floating-point, symmetric positive definite `Tensor` of
        same `dtype` as `loc`.  The strict upper triangle of `covariance_matrix`
        is ignored, so if `covariance_matrix` is not symmetric no error will be
        raised (unless `validate_args is True`).  `covariance_matrix` has shape
        `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if neither `loc` nor `covariance_matrix` are specified.
    """
        parameters = dict(locals())

        # Convert the covariance_matrix up to a scale_tril and call MVNTriL.
        with ops.name_scope(name) as name:
            with ops.name_scope("init", values=[loc, covariance_matrix]):
                if covariance_matrix is None:
                    scale_tril = None
                else:
                    covariance_matrix = ops.convert_to_tensor(
                        covariance_matrix, name="covariance_matrix")
                    if validate_args:
                        covariance_matrix = control_flow_ops.with_dependencies(
                            [
                                check_ops.assert_near(
                                    covariance_matrix,
                                    array_ops.matrix_transpose(
                                        covariance_matrix),
                                    message="Matrix was not symmetric")
                            ], covariance_matrix)
                    # No need to validate that covariance_matrix is non-singular.
                    # LinearOperatorLowerTriangular has an assert_non_singular method that
                    # is called by the Bijector.
                    # However, cholesky() ignores the upper triangular part, so we do need
                    # to separately assert symmetric.
                    scale_tril = linalg_ops.cholesky(covariance_matrix)
                super(MultivariateNormalFullCovariance,
                      self).__init__(loc=loc,
                                     scale_tril=scale_tril,
                                     validate_args=validate_args,
                                     allow_nan_stats=allow_nan_stats,
                                     name=name)
        self._parameters = parameters
def random_normal_correlated_columns(shape,
                                     mean=0.0,
                                     stddev=1.0,
                                     dtype=dtypes.float32,
                                     eps=1e-4,
                                     seed=None):
    """Batch matrix with (possibly complex) Gaussian entries and correlated cols.

  Returns random batch matrix `A` with specified element-wise `mean`, `stddev`,
  living close to an embedded hyperplane.

  Suppose `shape[-2:] = (M, N)`.

  If `M < N`, `A` is a random `M x N` [batch] matrix with iid Gaussian entries.

  If `M >= N`, then the colums of `A` will be made almost dependent as follows:

  ```
  L = random normal N x N-1 matrix, mean = 0, stddev = 1 / sqrt(N - 1)
  B = random normal M x N-1 matrix, mean = 0, stddev = stddev.

  G = (L B^H)^H, a random normal M x N matrix, living on N-1 dim hyperplane
  E = a random normal M x N matrix, mean = 0, stddev = eps
  mu = a constant M x N matrix, equal to the argument "mean"

  A = G + E + mu
  ```

  Args:
    shape:  Python list of integers.
      Shape of the returned tensor.  Must be at least length two.
    mean:  `Tensor` giving mean of normal to sample from.
    stddev:  `Tensor` giving stdev of normal to sample from.
    dtype:  `TensorFlow` `dtype` or numpy dtype
    eps:  Distance each column is perturbed from the low-dimensional subspace.
    seed:  Python integer seed for the RNG.

  Returns:
    `Tensor` with desired shape and dtype.

  Raises:
    ValueError:  If `shape` is not at least length 2.
  """
    dtype = dtypes.as_dtype(dtype)

    if len(shape) < 2:
        raise ValueError(
            "Argument shape must be at least length 2.  Found: %s" % shape)

    # Shape is the final shape, e.g. [..., M, N]
    shape = list(shape)
    batch_shape = shape[:-2]
    m, n = shape[-2:]

    # If there is only one column, "they" are by definition correlated.
    if n < 2 or n < m:
        return random_normal(shape,
                             mean=mean,
                             stddev=stddev,
                             dtype=dtype,
                             seed=seed)

    # Shape of the matrix with only n - 1 columns that we will embed in higher
    # dimensional space.
    smaller_shape = batch_shape + [m, n - 1]

    # Shape of the embedding matrix, mapping batch matrices
    # from [..., N-1, M] to [..., N, M]
    embedding_mat_shape = batch_shape + [n, n - 1]

    # This stddev for the embedding_mat ensures final result has correct stddev.
    stddev_mat = 1 / np.sqrt(n - 1)

    with ops.name_scope("random_normal_correlated_columns"):
        smaller_mat = random_normal(smaller_shape,
                                    mean=0.0,
                                    stddev=stddev_mat,
                                    dtype=dtype,
                                    seed=seed)

        if seed is not None:
            seed += 1287

        embedding_mat = random_normal(embedding_mat_shape,
                                      dtype=dtype,
                                      seed=seed)

        embedded_t = math_ops.matmul(embedding_mat,
                                     smaller_mat,
                                     transpose_b=True)
        embedded = array_ops.matrix_transpose(embedded_t)

        mean_mat = array_ops.ones_like(embedded) * mean

        return embedded + random_normal(shape, stddev=eps,
                                        dtype=dtype) + mean_mat
Esempio n. 39
0
def sign_magnitude_positive_definite(raw,
                                     off_diagonal_scale=0.,
                                     overall_scale=0.):
    """Constructs a positive definite matrix from an unconstrained input matrix.

  We want to keep the whole matrix on a log scale, but also allow off-diagonal
  elements to be negative, so the sign of off-diagonal elements is modeled
  separately from their magnitude (using the lower and upper triangles
  respectively). Specifically:

  for i < j, we have:
    output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) *
        exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2)

  output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2)

  output = output_cholesky^T * output_cholesky

  where raw, off_diagonal_scale, and overall_scale are
  un-constrained real-valued variables. The resulting values are stable
  around zero due to the exponential (and the softsign keeps the function
  smooth).

  Args:
    raw: A [..., M, M] Tensor.
    off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative
        scale of off-diagonal values in the output matrix.
    overall_scale: A scalar or [...] shaped Tensor controlling the overall scale
        of the output matrix.
  Returns:
    The `output` matrix described above, a [..., M, M] positive definite matrix.

  """
    raw = ops.convert_to_tensor(raw)
    diagonal = array_ops.matrix_diag_part(raw)

    def _right_pad_with_ones(tensor, target_rank):
        # Allow broadcasting even if overall_scale and off_diagonal_scale have batch
        # dimensions
        tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype)
        return array_ops.reshape(
            tensor,
            array_ops.concat([
                array_ops.shape(tensor),
                array_ops.ones([target_rank - array_ops.rank(tensor)],
                               dtype=target_rank.dtype)
            ],
                             axis=0))

    # We divide the log values by 2 to compensate for the squaring that happens
    # when transforming Cholesky factors into positive definite matrices.
    sign_magnitude = (gen_math_ops.exp(
        (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) +
         _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) *
                      nn.softsign(array_ops.matrix_transpose(raw)))
    sign_magnitude.set_shape(raw.get_shape())
    cholesky_factor = array_ops.matrix_set_diag(
        input=array_ops.matrix_band_part(sign_magnitude, 0, -1),
        diagonal=gen_math_ops.exp(
            (diagonal +
             _right_pad_with_ones(overall_scale, array_ops.rank(diagonal))) /
            2.))
    return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
Esempio n. 40
0
def _SvdGrad(op, grad_s, grad_u, grad_v):
    """Gradient for the singular value decomposition."""

    # The derivation for the compute_uv=False case, and most of
    # the derivation for the full_matrices=True case, are in
    # Giles' paper (see reference at top of file).  A derivation for
    # the full_matrices=False case is available at
    # https://j-towns.github.io/papers/svd-derivative.pdf
    # The derivation for complex valued SVD can be found in
    # https://re-ra.xyz/misc/complexsvd.pdf or
    # https://giggleliu.github.io/2019/04/02/einsumbp.html
    a = op.inputs[0]
    a_shape = a.get_shape().with_rank_at_least(2)
    grad_s = math_ops.cast(grad_s, a.dtype)
    grad_s_mat = array_ops.matrix_diag(grad_s)

    if not op.get_attr("compute_uv"):
        s, u, v = linalg_ops.svd(a, compute_uv=True)
        grad_a = math_ops.matmul(
            u, math_ops.matmul(grad_s_mat, v, adjoint_b=True))
        grad_a.set_shape(a_shape)
        return grad_a

    full_matrices = op.get_attr("full_matrices")

    grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
    grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
    m = a_shape.dims[-2].merge_with(grad_u_shape[-2])
    n = a_shape.dims[-1].merge_with(grad_v_shape[-2])
    batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
        grad_v_shape[:-2])
    a_shape = batch_shape.concatenate([m, n])

    m = a_shape.dims[-2].value
    n = a_shape.dims[-1].value
    # TODO(rmlarsen): Make this work with placeholders.
    if m is None or n is None:
        raise NotImplementedError(
            "SVD gradient has not been implemented for input with unknown "
            "inner matrix shape.")

    s = op.outputs[0]
    u = op.outputs[1]
    v = op.outputs[2]
    s = math_ops.cast(s, a.dtype)

    use_adjoint = False
    if m > n:
        # Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the
        # Hermitian transpose of the gradient at the end.
        use_adjoint = True
        m, n = n, m
        u, v = v, u
        grad_u, grad_v = grad_v, grad_u

    with ops.control_dependencies([grad_s, grad_u, grad_v]):
        if full_matrices and abs(m - n) > 1:
            raise NotImplementedError(
                "svd gradient is not implemented for abs(m - n) > 1 "
                "when full_matrices is True")
        s_mat = array_ops.matrix_diag(s)
        s2 = math_ops.square(s)

        # NOTICE: Because of the term involving f, the gradient becomes
        # infinite (or NaN in practice) when singular values are not unique.
        # Mathematically this should not be surprising, since for (k-fold)
        # degenerate singular values, the corresponding singular vectors are
        # only defined up a (k-dimensional) subspace. In practice, this can
        # lead to numerical instability when singular values are close but not
        # exactly equal.

        s_shape = array_ops.shape(s)
        f = array_ops.matrix_set_diag(
            _SafeReciprocal(
                array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)),
            array_ops.zeros_like(s))
        s_inv_mat = array_ops.matrix_diag(_SafeReciprocal(s))

        v1 = v[..., :, :m]
        grad_v1 = grad_v[..., :, :m]

        u_gu = math_ops.matmul(u, grad_u, adjoint_a=True)
        v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True)

        f_u = f * u_gu
        f_v = f * v_gv

        term1_nouv = (grad_s_mat +
                      math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) +
                      math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v)))

        term1 = math_ops.matmul(
            u, math_ops.matmul(term1_nouv, v1, adjoint_b=True))

        if m == n:
            grad_a_before_transpose = term1
        else:
            gv1t = array_ops.matrix_transpose(grad_v1, conjugate=True)
            gv1t_v1 = math_ops.matmul(gv1t, v1)
            term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True)

            if full_matrices:
                v2 = v[..., :, m:n]
                grad_v2 = grad_v[..., :, m:n]

                v1t_gv2 = math_ops.matmul(v1, grad_v2, adjoint_a=True)
                term2_nous -= math_ops.matmul(v1t_gv2, v2, adjoint_b=True)

            u_s_inv = math_ops.matmul(u, s_inv_mat)
            term2 = math_ops.matmul(u_s_inv, term2_nous)

            grad_a_before_transpose = term1 + term2

        if a.dtype.is_complex:
            eye = _linalg.eye(s_shape[-1],
                              batch_shape=s_shape[:-1],
                              dtype=a.dtype)
            l = eye * v_gv
            term3_nouv = math_ops.matmul(s_inv_mat, _linalg.adjoint(l) - l)
            term3 = 1 / 2. * math_ops.matmul(
                u, math_ops.matmul(term3_nouv, v1, adjoint_b=True))

            grad_a_before_transpose += term3

        if use_adjoint:
            grad_a = array_ops.matrix_transpose(grad_a_before_transpose,
                                                conjugate=True)
        else:
            grad_a = grad_a_before_transpose

        grad_a.set_shape(a_shape)
        return grad_a
Esempio n. 41
0
 def _batch_matmul(self, x, transpose_x=False):
   if transpose_x:
     x = array_ops.matrix_transpose(x)
   diag_mat = array_ops.expand_dims(self._diag, -1)
   return math_ops.square(diag_mat) * x
Esempio n. 42
0
def _MatrixSquareRootGrad(op, grad):
    """Gradient for MatrixSquareRoot."""

    # Let A be an m x m square matrix (or batch of matrices)
    # Let R = sqrtm(A)
    # By definition, A = RR
    # Take the differential: dA = d(RR) = RdR + dRR
    # Solve the resulting Sylvester equation for dR

    # Used to find Kronecker products within the Sylvester equation
    def _KroneckerProduct(b1, b2):
        """Computes the Kronecker product of two batches of square matrices."""
        b1_shape = array_ops.shape(b1)
        b2_shape = array_ops.shape(b2)
        b1_order = b1_shape[-1]
        b2_order = b2_shape[-1]

        shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)]
        shape_slice = array_ops.slice(
            b1_shape, [0], shape_slice_size)  # Same for both batches
        b1_reshape_shape = array_ops.concat(
            [shape_slice, [b1_order], [1], [b1_order], [1]], 0)
        b2_reshape_shape = array_ops.concat(
            [shape_slice, [1], [b2_order], [1], [b2_order]], 0)

        b1_reshape = array_ops.reshape(b1, b1_reshape_shape)
        b2_reshape = array_ops.reshape(b2, b2_reshape_shape)

        order_prod = b1_order * b2_order
        kprod_shape = array_ops.concat(
            [shape_slice, [order_prod], [order_prod]], 0)
        return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)

    sqrtm = op.outputs[0]  # R
    shape = array_ops.shape(sqrtm)
    order = shape[-1]  # m
    matrix_count = math_ops.reduce_prod(shape[0:-2])

    # Get batch of m x m identity matrices
    eye = linalg_ops.eye(order, dtype=sqrtm.dtype)  # m x m identity matrix
    eye_flat = array_ops.reshape(eye, [-1])
    eye_tiled = array_ops.tile(eye_flat, [matrix_count])
    eye_batch = array_ops.reshape(eye_tiled, shape)

    # The transpose of R is taken in the k1 term instead of k2 in
    # order to prevent redundant transposition of R (i.e. (R')' = R)
    sqrtm_transpose = array_ops.matrix_transpose(sqrtm)
    k1 = _KroneckerProduct(eye_batch, sqrtm_transpose)
    k2 = _KroneckerProduct(sqrtm, eye_batch)
    ksum = math_ops.add(k1, k2)

    # Vectorize dA
    shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)]
    shape_slice = array_ops.slice(shape, [0], shape_slice_size)
    shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0)
    vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da)

    # Solve for vec(dR)
    vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da)

    # Solve for dR by inverse vectorizing vec(dR)
    dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape)
    return array_ops.matrix_transpose(dsqrtm_transpose)
Esempio n. 43
0
def lanczos_bidiag(operator,
                   k,
                   orthogonalize=True,
                   starting_vector=None,
                   name="lanczos_bidiag"):
    """Computes a Lanczos bidiagonalization for a linear operator.

  Computes matrices `U` of shape `[m, k+1]`, `V` of shape `[n, k]` and lower
  bidiagonal matrix `B` of shape `[k+1, k]`, that satisfy the equations
  `A * V = U * B` and `A' * U[:, :-1] = V * B[:-1, :]'`.

  The columns of `U` are orthonormal and form a basis for the Krylov subspace
  `K(A*A', U[:,0])`.

  The columns of `V` are orthonormal and form a basis for the Krylov subspace
  `K(A'*A, A' U[:,0])`.

  Args:
    operator: An object representing a linear operator with attributes:
      - shape: Either a list of integers or a 1-D `Tensor` of type `int32` of
        length 2. `shape[0]` is the dimension on the domain of the operator,
        `shape[1]` is the dimension of the co-domain of the operator. On other
        words, if operator represents an M x N matrix A, `shape` must contain
        `[M, N]`.
      - dtype: The datatype of input to and output from `apply` and
        `apply_adjoint`.
      - apply: Callable object taking a vector `x` as input and returning a
        vector with the result of applying the operator to `x`, i.e. if
       `operator` represents matrix `A`, `apply` should return `A * x`.
      - apply_adjoint: Callable object taking a vector `x` as input and
        returning a vector with the result of applying the adjoint operator
        to `x`, i.e. if `operator` represents matrix `A`, `apply_adjoint` should
        return `conj(transpose(A)) * x`.
    k: An integer or a scalar Tensor of type `int32`. Determines the maximum
      number of steps to run. If an invariant subspace is found, the algorithm
      may terminate before `k` steps have been run.
    orthogonalize: If `True`, perform full orthogonalization. If `False` no
      orthogonalization is performed.
    starting_vector: If not null, must be a `Tensor` of shape `[n]`.
    name: A name scope for the operation.

  Returns:
    output: A namedtuple representing a Lanczos bidiagonalization of
      `operator` with attributes:
      u: A rank-2 `Tensor` of type `operator.dtype` and shape
        `[operator.shape[0], k_actual+1]`, where `k_actual` is the number of
        steps run.
      v: A rank-2 `Tensor` of type `operator.dtype` and shape
        `[operator.shape[1], k_actual]`, where `k_actual` is the number of steps
        run.
      alpha: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`.
      beta: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`.
  """
    def tarray(size, dtype, name):
        return tensor_array_ops.TensorArray(dtype=dtype,
                                            size=size,
                                            tensor_array_name=name,
                                            clear_after_read=False)

    # Reads a row-vector at location i in tarray and returns it as a
    # column-vector.
    def read_colvec(tarray, i):
        return array_ops.expand_dims(tarray.read(i), -1)

    # Writes an column-vector as a row-vecor at location i in tarray.
    def write_colvec(tarray, colvec, i):
        return tarray.write(i, array_ops.squeeze(colvec))

    # Ephemeral class holding Lanczos bidiagonalization state:
    #   u = left Lanczos vectors
    #   v = right Lanczos vectors
    #   alpha = diagonal of B_k.
    #   beta = subdiagonal of B_k.
    # Notice that we store the left and right Lanczos vectors as the _rows_
    # of u and v. This is done because tensors are stored row-major and
    # TensorArray only supports packing along dimension 0.
    lanzcos_bidiag_state = collections.namedtuple("LanczosBidiagState",
                                                  ["u", "v", "alpha", "beta"])

    def update_state(old, i, u, v, alpha, beta):
        return lanzcos_bidiag_state(write_colvec(old.u, u, i + 1),
                                    write_colvec(old.v, v, i),
                                    old.alpha.write(i, alpha),
                                    old.beta.write(i, beta))

    def gram_schmidt_step(j, basis, v):
        """Makes v orthogonal to the j'th vector in basis."""
        v_shape = v.get_shape()
        basis_vec = read_colvec(basis, j)
        v -= math_ops.matmul(basis_vec, v, adjoint_a=True) * basis_vec
        v.set_shape(v_shape)
        return j + 1, basis, v

    def orthogonalize_once(i, basis, v):
        j = constant_op.constant(0, dtype=dtypes.int32)
        _, _, v = control_flow_ops.while_loop(lambda j, basis, v: j < i,
                                              gram_schmidt_step, [j, basis, v])
        return util.l2normalize(v)

    # Iterated modified Gram-Schmidt orthogonalization adapted from PROPACK.
    # TODO(rmlarsen): This is possibly the slowest implementation of
    # iterated Gram-Schmidt orthogonalization since the abacus. Move to C++.
    def orthogonalize_(i, basis, v):
        v_norm = util.l2norm(v)
        v_new, v_new_norm = orthogonalize_once(i, basis, v)
        # If the norm decreases more than 1/sqrt(2), run a second
        # round of MGS. See proof in:
        #   B. N. Parlett, ``The Symmetric Eigenvalue Problem'',
        #   Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109
        return control_flow_ops.cond(v_new_norm < 0.7071 * v_norm,
                                     lambda: orthogonalize_once(i, basis, v),
                                     lambda: (v_new, v_new_norm))

    def stopping_criterion(i, _):
        # TODO(rmlarsen): Stop if an invariant subspace is detected.
        return i < k

    def lanczos_bidiag_step(i, ls):
        """Extends the Lanczos bidiagonalization ls by one step."""
        u = read_colvec(ls.u, i)
        r = operator.apply_adjoint(u)
        # The shape inference doesn't work across cond, save and reapply the shape.
        r_shape = r.get_shape()
        r = control_flow_ops.cond(
            i > 0, lambda: r - ls.beta.read(i - 1) * read_colvec(ls.v, i - 1),
            lambda: r)
        r.set_shape(r_shape)
        if orthogonalize:
            v, alpha = orthogonalize_(i - 1, ls.v, r)
        else:
            v, alpha = util.l2normalize(r)
        p = operator.apply(v) - alpha * u
        if orthogonalize:
            u, beta = orthogonalize_(i, ls.u, p)
        else:
            u, beta = util.l2normalize(p)

        return i + 1, update_state(ls, i, u, v, alpha, beta)

    with ops.name_scope(name):
        dtype = operator.dtype
        if starting_vector is None:
            starting_vector = random_ops.random_uniform(operator.shape[:1],
                                                        -1,
                                                        1,
                                                        dtype=dtype)
        u0, _ = util.l2normalize(starting_vector)
        ls = lanzcos_bidiag_state(u=write_colvec(tarray(k + 1, dtype, "u"), u0,
                                                 0),
                                  v=tarray(k, dtype, "v"),
                                  alpha=tarray(k, dtype, "alpha"),
                                  beta=tarray(k, dtype, "beta"))
        i = constant_op.constant(0, dtype=dtypes.int32)
        _, ls = control_flow_ops.while_loop(stopping_criterion,
                                            lanczos_bidiag_step, [i, ls])
        return lanzcos_bidiag_state(array_ops.matrix_transpose(ls.u.stack()),
                                    array_ops.matrix_transpose(ls.v.stack()),
                                    ls.alpha.stack(), ls.beta.stack())
Esempio n. 44
0
def assert_symmetric(matrix):
  matrix_t = array_ops.matrix_transpose(matrix)
  return control_flow_ops.with_dependencies(
      [check_ops.assert_equal(matrix, matrix_t)], matrix)
 def _batch_sqrt_matmul(self, x, transpose_x=False):
   if transpose_x:
     x = array_ops.matrix_transpose(x)
   diag_mat = array_ops.expand_dims(self._diag, -1)
   return diag_mat * x
Esempio n. 46
0
 def _batch_sqrt_matmul(self, x, transpose_x=False):
   if transpose_x:
     x = array_ops.matrix_transpose(x)
   self._check_x(x)
   return math_ops.sqrt(self._scale) * x
Esempio n. 47
0
 def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self):
   vector = [1, 2, 3]
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "should be a "):
       array_ops.matrix_transpose(vector)
Esempio n. 48
0
def assert_symmetric(matrix):
    matrix_t = array_ops.matrix_transpose(matrix)
    return control_flow_ops.with_dependencies(
        [check_ops.assert_equal(matrix, matrix_t)], matrix)
Esempio n. 49
0
 def _batch_sqrt_matmul(self, x, transpose_x=False):
     if transpose_x:
         x = array_ops.matrix_transpose(x)
     self._check_x(x)
     return math_ops.sqrt(self._scale) * x
Esempio n. 50
0
    def test_defining_spd_operator_by_taking_real_part(self):
        with self.cached_session():  # Necessary for fft_kernel_label_map
            # S is real and positive.
            s = linear_operator_test_util.random_uniform(shape=(10, 2, 3, 4),
                                                         dtype=dtypes.float32,
                                                         minval=1.,
                                                         maxval=2.)

            # Let S = S1 + S2, the Hermitian and anti-hermitian parts.
            # S1 = 0.5 * (S + S^H), S2 = 0.5 * (S - S^H),
            # where ^H is the Hermitian transpose of the function:
            #    f(n0, n1, n2)^H := ComplexConjugate[f(N0-n0, N1-n1, N2-n2)].
            # We want to isolate S1, since
            #   S1 is Hermitian by construction
            #   S1 is real since S is
            #   S1 is positive since it is the sum of two positive kernels

            # IDFT[S] = IDFT[S1] + IDFT[S2]
            #         =      H1  +      H2
            # where H1 is real since it is Hermitian,
            # and H2 is imaginary since it is anti-Hermitian.
            ifft_s = fft_ops.ifft3d(math_ops.cast(s, dtypes.complex64))

            # Throw away H2, keep H1.
            real_ifft_s = math_ops.real(ifft_s)

            # This is the perfect spectrum!
            # spectrum = DFT[H1]
            #          = S1,
            fft_real_ifft_s = fft_ops.fft3d(
                math_ops.cast(real_ifft_s, dtypes.complex64))

            # S1 is Hermitian ==> operator is real.
            # S1 is real ==> operator is self-adjoint.
            # S1 is positive ==> operator is positive-definite.
            operator = linalg.LinearOperatorCirculant3D(fft_real_ifft_s)

            # Allow for complex output so we can check operator has zero imag part.
            self.assertEqual(operator.dtype, dtypes.complex64)
            matrix, matrix_t = self.evaluate([
                operator.to_dense(),
                array_ops.matrix_transpose(operator.to_dense())
            ])
            self.evaluate(
                operator.assert_positive_definite())  # Should not fail.
            np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6)
            self.assertAllClose(matrix, matrix_t)

            # Just to test the theory, get S2 as well.
            # This should create an imaginary operator.
            # S2 is anti-Hermitian ==> operator is imaginary.
            # S2 is real ==> operator is self-adjoint.
            imag_ifft_s = math_ops.imag(ifft_s)
            fft_imag_ifft_s = fft_ops.fft3d(
                1j * math_ops.cast(imag_ifft_s, dtypes.complex64))
            operator_imag = linalg.LinearOperatorCirculant3D(fft_imag_ifft_s)

            matrix, matrix_h = self.evaluate([
                operator_imag.to_dense(),
                array_ops.matrix_transpose(
                    math_ops.conj(operator_imag.to_dense()))
            ])
            self.assertAllClose(matrix, matrix_h)
            np.testing.assert_allclose(0, np.real(matrix), atol=1e-7)