Example #1
0
  def _Underdetermined(op, grad):
    """Gradients for the underdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the second
    kind:
      X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B
    that (for lambda=0) solve the least squares problem
      min ||X||_F subject to A*X = B.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    # pylint: disable=protected-access
    chol = linalg_ops._RegularizedGramianCholesky(
        a, l2_regularizer=l2_regularizer, first_kind=False)
    # pylint: enable=protected-access
    grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
    # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
    tmp = linalg_ops.cholesky_solve(chol, b)
    a1 = math_ops.matmul(tmp, a, adjoint_a=True)
    a1 = -math_ops.matmul(grad_b, a1)
    a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True)
    a2 = math_ops.matmul(tmp, a2, adjoint_b=True)
    grad_a = a1 + a2
    return (grad_a, grad_b, None)
    def _Underdetermined(op, grad):
        """Gradients for the underdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the second
    kind:
      X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B
    that (for lambda=0) solve the least squares problem
      min ||X||_F subject to A*X = B.
    """
        a = op.inputs[0]
        b = op.inputs[1]
        l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
        # pylint: disable=protected-access
        chol = linalg_ops._RegularizedGramianCholesky(
            a, l2_regularizer=l2_regularizer, first_kind=False)
        # pylint: enable=protected-access
        grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
        # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
        tmp = linalg_ops.cholesky_solve(chol, b)
        a1 = math_ops.matmul(tmp, a, adjoint_a=True)
        a1 = -math_ops.matmul(grad_b, a1)
        a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True)
        a2 = math_ops.matmul(tmp, a2, adjoint_b=True)
        grad_a = a1 + a2
        return (grad_a, grad_b, None)
Example #3
0
  def _underdetermined(op, grad):
    """Gradients for the underdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the second
    kind:
      X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B
    that (for lambda=0) solve the least squares problem
      min ||X||_F subject to A*X = B.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    a_shape = array_ops.shape(a)
    batch_shape = a_shape[:-2]
    m = a_shape[-2]

    identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype)
    gramian = math_ops.matmul(a, a, adjoint_b=True) + l2_regularizer * identity
    chol = linalg_ops.cholesky(gramian)
    grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
    # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
    tmp = linalg_ops.cholesky_solve(chol, b)
    a1 = math_ops.matmul(tmp, a, adjoint_a=True)
    a1 = -math_ops.matmul(grad_b, a1)
    a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True)
    a2 = math_ops.matmul(tmp, a2, adjoint_b=True)
    grad_a = a1 + a2
    return (grad_a, grad_b, None)
  def _underdetermined(op, grad):
    """Gradients for the underdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the second
    kind:
      X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B
    that (for lambda=0) solve the least squares problem
      min ||X||_F subject to A*X = B.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = op.inputs[2]
    a_shape = array_ops.shape(a)
    batch_shape = a_shape[:-2]
    m = a_shape[-2]

    identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype)
    gramian = math_ops.batch_matmul(
        a, a, adj_y=True) + l2_regularizer * identity
    chol = linalg_ops.cholesky(gramian)
    grad_b = linalg_ops.cholesky_solve(chol, math_ops.batch_matmul(a, grad))
    # Temporary z = (A * A^T + lambda * I)^{-1} * B.
    z = linalg_ops.cholesky_solve(chol, b)
    bz = -math_ops.batch_matmul(grad_b, z, adj_y=True)
    bz_sym = bz + array_ops.matrix_transpose(bz)
    grad_a = math_ops.batch_matmul(bz_sym, a) + math_ops.batch_matmul(z, grad)
    return (grad_a, grad_b, None)
    def _underdetermined(op, grad):
        """Gradients for the underdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the second
    kind:
      X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B
    that (for lambda=0) solve the least squares problem
      min ||X||_F subject to A*X = B.
    """
        a = op.inputs[0]
        b = op.inputs[1]
        l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
        a_shape = array_ops.shape(a)
        batch_shape = a_shape[:-2]
        m = a_shape[-2]

        identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype)
        gramian = math_ops.matmul(a, a,
                                  adjoint_b=True) + l2_regularizer * identity
        chol = linalg_ops.cholesky(gramian)
        grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
        # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
        tmp = linalg_ops.cholesky_solve(chol, b)
        a1 = math_ops.matmul(tmp, a, adjoint_a=True)
        a1 = -math_ops.matmul(grad_b, a1)
        a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True)
        a2 = math_ops.matmul(tmp, a2, adjoint_b=True)
        grad_a = a1 + a2
        return (grad_a, grad_b, None)
Example #6
0
  def _overdetermined(op, grad):
    """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    x = op.outputs[0]
    a_shape = array_ops.shape(a)
    batch_shape = a_shape[:-2]
    n = a_shape[-1]

    identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype)
    gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity
    chol = linalg_ops.cholesky(gramian)
    # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
    z = linalg_ops.cholesky_solve(chol, grad)
    xzt = math_ops.matmul(x, z, adjoint_b=True)
    zx_sym = xzt + array_ops.matrix_transpose(xzt)
    grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True)
    grad_b = math_ops.matmul(a, z)
    return (grad_a, grad_b, None)
  def _batch_sqrt_solve(self, rhs):
    # Recall the square root of this operator is M + VDV^T.
    # The Woodbury formula gives:
    # (M + VDV^T)^{-1}
    # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1}
    # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1}
    # where C is the capacitance matrix.
    m = self._operator
    v = self._v
    cchol = self._chol_capacitance(batch_mode=True)

    # The operators will use batch/singleton mode automatically.  We don't
    # override.
    # M^{-1} rhs
    minv_rhs = m.solve(rhs)
    # V^T M^{-1} rhs
    vt_minv_rhs = math_ops.batch_matmul(v, minv_rhs, adj_x=True)
    # C^{-1} V^T M^{-1} rhs
    cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs)
    # V C^{-1} V^T M^{-1} rhs
    v_cinv_vt_minv_rhs = math_ops.batch_matmul(v, cinv_vt_minv_rhs)
    # M^{-1} V C^{-1} V^T M^{-1} rhs
    minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs)

    # M^{-1} - M^{-1} V C^{-1} V^T M^{-1}
    return minv_rhs - minv_v_cinv_vt_minv_rhs
  def _sqrt_solve(self, rhs):
    # Recall the square root of this operator is M + VDV^T.
    # The Woodbury formula gives:
    # (M + VDV^T)^{-1}
    # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1}
    # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1}
    # where C is the capacitance matrix.
    # TODO(jvdillon) Determine if recursively applying rank-1 updates is more
    # efficient.  May not be possible because a general n x n matrix can be
    # represeneted as n rank-1 updates, and solving with this matrix is always
    # done in O(n^3) time.
    m = self._operator
    v = self._v
    cchol = self._chol_capacitance(batch_mode=False)

    # The operators will use batch/singleton mode automatically.  We don't
    # override.
    # M^{-1} rhs
    minv_rhs = m.solve(rhs)
    # V^T M^{-1} rhs
    vt_minv_rhs = math_ops.matmul(v, minv_rhs, transpose_a=True)
    # C^{-1} V^T M^{-1} rhs
    cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs)
    # V C^{-1} V^T M^{-1} rhs
    v_cinv_vt_minv_rhs = math_ops.matmul(v, cinv_vt_minv_rhs)
    # M^{-1} V C^{-1} V^T M^{-1} rhs
    minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs)

    # M^{-1} - M^{-1} V C^{-1} V^T M^{-1}
    return minv_rhs - minv_v_cinv_vt_minv_rhs
Example #9
0
def posdef_inv_cholesky(tensor, identity, damping):
    """Computes inverse(tensor + damping * identity) with Cholesky."""
    # tensor = math_ops.to_double(tensor)
    # damping = math_ops.to_double(damping)
    # identity = math_ops.to_double(identity)
    chol = linalg_ops.cholesky(tensor + damping * identity)
    return linalg_ops.cholesky_solve(chol, identity)
Example #10
0
    def _Overdetermined(op, grad):
        """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
        a = op.inputs[0]
        b = op.inputs[1]
        x = op.outputs[0]
        l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
        # pylint: disable=protected-access
        chol = linalg_ops._RegularizedGramianCholesky(
            a, l2_regularizer=l2_regularizer, first_kind=True)
        # pylint: enable=protected-access
        # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
        z = linalg_ops.cholesky_solve(chol, grad)
        xzt = math_ops.matmul(x, z, adjoint_b=True)
        zx_sym = xzt + array_ops.matrix_transpose(xzt)
        grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(
            b, z, adjoint_b=True)
        grad_b = math_ops.matmul(a, z)
        return (grad_a, grad_b, None)
Example #11
0
    def _overdetermined(op, grad):
        """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
        a = op.inputs[0]
        b = op.inputs[1]
        l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
        x = op.outputs[0]
        a_shape = array_ops.shape(a)
        batch_shape = a_shape[:-2]
        n = a_shape[-1]

        identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype)
        gramian = math_ops.matmul(a, a,
                                  adjoint_a=True) + l2_regularizer * identity
        chol = linalg_ops.cholesky(gramian)
        # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
        z = linalg_ops.cholesky_solve(chol, grad)
        xzt = math_ops.matmul(x, z, adjoint_b=True)
        zx_sym = xzt + array_ops.matrix_transpose(xzt)
        grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(
            b, z, adjoint_b=True)
        grad_b = math_ops.matmul(a, z)
        return (grad_a, grad_b, None)
Example #12
0
 def _solve(self, rhs, adjoint=False, adjoint_arg=False):
   if self.is_square is False:
     raise NotImplementedError(
         "Solve is not yet implemented for non-square operators.")
   rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs
   if self._can_use_cholesky():
     return linalg_ops.cholesky_solve(self._get_cached_chol(), rhs)
   return linalg_ops.matrix_solve(
       self._get_cached_dense_matrix(), rhs, adjoint=adjoint)
Example #13
0
  def test_static_dims_broadcast(self):
    # batch_shape = [2]
    chol = rng.rand(3, 3)
    rhs = rng.rand(2, 3, 7)
    chol_broadcast = chol + np.zeros((2, 1, 1))

    result = linear_operator_util.cholesky_solve_with_broadcast(chol, rhs)
    self.assertAllEqual((2, 3, 7), result.get_shape())
    expected = linalg_ops.cholesky_solve(chol_broadcast, rhs)
    self.assertAllClose(*self.evaluate([expected, result]))
Example #14
0
 def _dense_solve(self, rhs, adjoint=False, adjoint_arg=False):
     """Solve by conversion to a dense matrix."""
     if self.is_square is False:  # pylint: disable=g-bool-id-comparison
         raise NotImplementedError(
             "Solve is not yet implemented for non-square operators.")
     rhs = linalg.adjoint(rhs) if adjoint_arg else rhs
     if self._can_use_cholesky():
         return linalg_ops.cholesky_solve(
             linalg_ops.cholesky(self.to_dense()), rhs)
     return linear_operator_util.matrix_solve_with_broadcast(
         self.to_dense(), rhs, adjoint=adjoint)
  def test_static_dims_broadcast(self):
    # batch_shape = [2]
    chol = rng.rand(3, 3)
    rhs = rng.rand(2, 3, 7)
    chol_broadcast = chol + np.zeros((2, 1, 1))

    with self.cached_session():
      result = linear_operator_util.cholesky_solve_with_broadcast(chol, rhs)
      self.assertAllEqual((2, 3, 7), result.get_shape())
      expected = linalg_ops.cholesky_solve(chol_broadcast, rhs)
      self.assertAllEqual(expected.eval(), result.eval())
Example #16
0
 def _solve(self, rhs, adjoint=False, adjoint_arg=False):
     """Default implementation of _solve."""
     if self.is_square is False:
         raise NotImplementedError(
             "Solve is not yet implemented for non-square operators.")
     logging.warn(
         "Using (possibly slow) default implementation of solve."
         "  Requires conversion to a dense matrix and O(N^3) operations.")
     rhs = linalg.adjoint(rhs) if adjoint_arg else rhs
     if self._can_use_cholesky():
         return linalg_ops.cholesky_solve(
             linalg_ops.cholesky(self.to_dense()), rhs)
     return linalg_ops.matrix_solve(self.to_dense(), rhs, adjoint=adjoint)
Example #17
0
 def _solve(self, rhs, adjoint=False, adjoint_arg=False):
   """Default implementation of _solve."""
   if self.is_square is False:
     raise NotImplementedError(
         "Solve is not yet implemented for non-square operators.")
   logging.warn(
       "Using (possibly slow) default implementation of solve."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs
   if self._can_use_cholesky():
     return linalg_ops.cholesky_solve(self._get_cached_chol(), rhs)
   return linalg_ops.matrix_solve(
       self._get_cached_dense_matrix(), rhs, adjoint=adjoint)
Example #18
0
 def test_works_with_five_different_random_pos_def_matrices(self):
   for n in range(1, 6):
     for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]:
       with self.session(use_gpu=True):
         # Create 2 x n x n matrix
         array = np.array(
             [_RandomPDMatrix(n, self.rng),
              _RandomPDMatrix(n, self.rng)]).astype(np_type)
         chol = linalg_ops.cholesky(array)
         for k in range(1, 3):
           rhs = self.rng.randn(2, n, k).astype(np_type)
           x = linalg_ops.cholesky_solve(chol, rhs)
           self.assertAllClose(
               rhs, math_ops.matmul(array, x).eval(), atol=atol)
Example #19
0
  def test_dynamic_dims_broadcast_64bit(self):
    # batch_shape = [2, 2]
    chol = rng.rand(2, 3, 3)
    rhs = rng.rand(2, 1, 3, 7)
    chol_broadcast = chol + np.zeros((2, 2, 1, 1))
    rhs_broadcast = rhs + np.zeros((2, 2, 1, 1))

    chol_ph = array_ops.placeholder_with_default(chol, shape=None)
    rhs_ph = array_ops.placeholder_with_default(rhs, shape=None)

    result, expected = self.evaluate([
        linear_operator_util.cholesky_solve_with_broadcast(chol_ph, rhs_ph),
        linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast)
    ])
    self.assertAllClose(expected, result)
Example #20
0
    def _solve(self, rhs, adjoint=False, adjoint_arg=False):
        if self.base_operator.is_non_singular is False:
            raise ValueError(
                "Solve not implemented unless this is a perturbation of a "
                "non-singular LinearOperator.")
        # The Woodbury formula gives:
        # https://en.wikipedia.org/wiki/Woodbury_matrix_identity
        #   (L + UDV^H)^{-1}
        #   = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1}
        #   = L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
        # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U
        # Note also that, with ^{-H} being the inverse of the adjoint,
        #   (L + UDV^H)^{-H}
        #   = L^{-H} - L^{-H} V C^{-H} U^H L^{-H}
        l = self.base_operator
        if adjoint:
            # If adjoint, U and V have flipped roles in the operator.
            v, u = self._get_uv_as_tensors()
            # Capacitance should still be computed with u=self.u and v=self.v, which
            # after the "flip" on the line above means u=v, v=u. I.e. no need to
            # "flip" in the capacitance call, since the call to
            # matrix_solve_with_broadcast below is done with the `adjoint` argument,
            # and this takes care of things.
            capacitance = self._make_capacitance(u=v, v=u)
        else:
            u, v = self._get_uv_as_tensors()
            capacitance = self._make_capacitance(u=u, v=v)

        # L^{-1} rhs
        linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg)
        # V^H L^{-1} rhs
        vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True)
        # C^{-1} V^H L^{-1} rhs
        if self._use_cholesky:
            capinv_vh_linv_rhs = linalg_ops.cholesky_solve(
                linalg_ops.cholesky(capacitance), vh_linv_rhs)
        else:
            capinv_vh_linv_rhs = linear_operator_util.matrix_solve_with_broadcast(
                capacitance, vh_linv_rhs, adjoint=adjoint)
        # U C^{-1} V^H M^{-1} rhs
        u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs)
        # L^{-1} U C^{-1} V^H L^{-1} rhs
        linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs,
                                            adjoint=adjoint)

        # L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
        return linv_rhs - linv_u_capinv_vh_linv_rhs
    def _solve(self, rhs, adjoint=False, adjoint_arg=False):
        if self.base_operator.is_non_singular is False:
            raise ValueError(
                "Solve not implemented unless this is a perturbation of a "
                "non-singular LinearOperator.")
        # The Woodbury formula gives:
        # https://en.wikipedia.org/wiki/Woodbury_matrix_identity
        #   (L + UDV^H)^{-1}
        #   = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1}
        #   = L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
        # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U
        # Note also that, with ^{-H} being the inverse of the adjoint,
        #   (L + UDV^H)^{-H}
        #   = L^{-H} - L^{-H} V C^{-H} U^H L^{-H}
        l = self.base_operator
        if adjoint:
            v = self.u
            u = self.v
        else:
            v = self.v
            u = self.u

        # L^{-1} rhs
        linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg)
        # V^H L^{-1} rhs
        vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True)
        # C^{-1} V^H L^{-1} rhs
        if self._use_cholesky:
            capinv_vh_linv_rhs = linalg_ops.cholesky_solve(
                self._chol_capacitance, vh_linv_rhs)
        else:
            capinv_vh_linv_rhs = linalg_ops.matrix_solve(self._capacitance,
                                                         vh_linv_rhs,
                                                         adjoint=adjoint)
        # U C^{-1} V^H M^{-1} rhs
        u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs)
        # L^{-1} U C^{-1} V^H L^{-1} rhs
        linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs,
                                            adjoint=adjoint)

        # L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
        return linv_rhs - linv_u_capinv_vh_linv_rhs
    def test_dynamic_dims_broadcast_64bit(self):
        # batch_shape = [2, 2]
        chol = rng.rand(2, 3, 3)
        rhs = rng.rand(2, 1, 3, 7)
        chol_broadcast = chol + np.zeros((2, 2, 1, 1))
        rhs_broadcast = rhs + np.zeros((2, 2, 1, 1))

        chol_ph = array_ops.placeholder(dtypes.float64)
        rhs_ph = array_ops.placeholder(dtypes.float64)

        with self.cached_session() as sess:
            result, expected = sess.run([
                linear_operator_util.cholesky_solve_with_broadcast(
                    chol_ph, rhs_ph),
                linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast)
            ],
                                        feed_dict={
                                            chol_ph: chol,
                                            rhs_ph: rhs,
                                        })
            self.assertAllClose(expected, result)
  def _solve(self, rhs, adjoint=False):
    if self.base_operator.is_non_singular is False:
      raise ValueError(
          "Solve not implemented unless this is a perturbation of a "
          "non-singular LinearOperator.")
    # The Woodbury formula gives:
    # https://en.wikipedia.org/wiki/Woodbury_matrix_identity
    #   (L + UDV^H)^{-1}
    #   = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1}
    #   = L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
    # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U
    # Note also that, with ^{-H} being the inverse of the adjoint,
    #   (L + UDV^H)^{-H}
    #   = L^{-H} - L^{-H} V C^{-H} U^H L^{-H}
    l = self.base_operator
    if adjoint:
      v = self.u
      u = self.v
    else:
      v = self.v
      u = self.u

    # L^{-1} rhs
    linv_rhs = l.solve(rhs, adjoint=adjoint)
    # V^H L^{-1} rhs
    vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True)
    # C^{-1} V^H L^{-1} rhs
    if self._use_cholesky:
      capinv_vh_linv_rhs = linalg_ops.cholesky_solve(
          self._chol_capacitance, vh_linv_rhs)
    else:
      capinv_vh_linv_rhs = linalg_ops.matrix_solve(
          self._capacitance, vh_linv_rhs, adjoint=adjoint)
    # U C^{-1} V^H M^{-1} rhs
    u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs)
    # L^{-1} U C^{-1} V^H L^{-1} rhs
    linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint)

    # L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
    return linv_rhs - linv_u_capinv_vh_linv_rhs
Example #24
0
  def testCholeskyFwdBackwd(self):
    with self.session() as sess:

      cfg = ipu.utils.create_ipu_config(profiling=True)
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      cfg = ipu.utils.auto_select_ipus(cfg, 1)
      ipu.utils.configure_ipu_system(cfg)

      with ops.device("/device:IPU:0"):
        with variable_scope.variable_scope("vs", use_resource=True):
          ls = variable_scope.get_variable(
              "lengthscale",
              shape=[1],
              dtype=np.float32,
              initializer=init_ops.constant_initializer(
                  np.array([0.2], dtype=np.float32)))

        x = array_ops.placeholder(np.float32, shape=[4, 2])
        expected = array_ops.placeholder(np.float32, shape=[4, 1])

        x_t = array_ops.transpose(array_ops.expand_dims(x, 0), (1, 0, 2))
        K = math_ops.exp(-0.5 * math_ops.reduce_sum(((x_t - x) / ls)**2., -1))
        L = linalg_ops.cholesky(K)
        alpha = linalg_ops.cholesky_solve(L, expected)

        loss = -math_ops.reduce_mean(
            math_ops.matmul(alpha, expected, transpose_a=True))
        optimizer = gradient_descent.GradientDescentOptimizer(0.1)
        train = optimizer.minimize(loss)

        fd = {
            x:
            np.array([[1., 0.63920265], [0.63920265, 1.],
                      [0.30846608, 0.24088137], [0.38437635, 0.76085484]],
                     dtype=np.float32),
            expected: [[0.4662998], [-0.27042738], [-0.1996377], [-1.1648941]]
        }

        sess.run(variables.global_variables_initializer())
        sess.run(train, feed_dict=fd)
  def test_dynamic_dims_broadcast_64bit(self):
    # batch_shape = [2, 2]
    chol = rng.rand(2, 3, 3)
    rhs = rng.rand(2, 1, 3, 7)
    chol_broadcast = chol + np.zeros((2, 2, 1, 1))
    rhs_broadcast = rhs + np.zeros((2, 2, 1, 1))

    chol_ph = array_ops.placeholder(dtypes.float64)
    rhs_ph = array_ops.placeholder(dtypes.float64)

    with self.cached_session() as sess:
      result, expected = sess.run(
          [
              linear_operator_util.cholesky_solve_with_broadcast(
                  chol_ph, rhs_ph),
              linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast)
          ],
          feed_dict={
              chol_ph: chol,
              rhs_ph: rhs,
          })
      self.assertAllEqual(expected, result)
Example #26
0
  def _Overdetermined(op, grad):
    """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    x = op.outputs[0]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    # pylint: disable=protected-access
    chol = linalg_ops._RegularizedGramianCholesky(
        a, l2_regularizer=l2_regularizer, first_kind=True)
    # pylint: enable=protected-access
    # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
    z = linalg_ops.cholesky_solve(chol, grad)
    xzt = math_ops.matmul(x, z, adjoint_b=True)
    zx_sym = xzt + array_ops.matrix_transpose(xzt)
    grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True)
    grad_b = math_ops.matmul(a, z)
    return (grad_a, grad_b, None)
def posdef_inv_cholesky(tensor, reg_mat, damping):
    """Computes inverse(tensor + damping * reg_mat) with Cholesky."""
    chol = linalg_ops.cholesky(tensor + damping * reg_mat)
    identity = linalg_ops.eye(tf.shape(tensor)[0], dtype=tensor.dtype)
    return linalg_ops.cholesky_solve(chol, identity)
 def _batch_solve(self, rhs):
   return linalg_ops.cholesky_solve(self._chol, rhs)
def cholesky_solve_with_broadcast(chol, rhs, name=None):
  """Solve systems of linear equations."""
  with ops.name_scope(name, "CholeskySolveWithBroadcast", [chol, rhs]):
    chol, rhs = broadcast_matrix_batch_dims([chol, rhs])
    return linalg_ops.cholesky_solve(chol, rhs)
 def _solve(self, rhs, adjoint=False, adjoint_arg=False):
     rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs
     if self._is_spd:
         return linalg_ops.cholesky_solve(self._chol, rhs)
     return linalg_ops.matrix_solve(self._matrix, rhs, adjoint=adjoint)
Example #31
0
 def _batch_solve(self, rhs):
   return linalg_ops.cholesky_solve(self._chol, rhs)
Example #32
0
def posdef_inv_cholesky(tensor, identity, damping):
  """Computes inverse(tensor + damping * identity) with Cholesky."""
  chol = linalg_ops.cholesky(tensor + damping * identity)
  return linalg_ops.cholesky_solve(chol, identity)
 def _solve(self, rhs, adjoint=False):
   if self._is_spd:
     return linalg_ops.cholesky_solve(self._chol, rhs)
   return linalg_ops.matrix_solve(self._matrix, rhs, adjoint=adjoint)
 def _solve(self, rhs, adjoint=False, adjoint_arg=False):
   rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs
   if self._is_spd:
     return linalg_ops.cholesky_solve(self._chol, rhs)
   return linalg_ops.matrix_solve(self._matrix, rhs, adjoint=adjoint)
def cholesky_solve_with_broadcast(chol, rhs, name=None):
  """Solve systems of linear equations."""
  with ops.name_scope(name, "CholeskySolveWithBroadcast", [chol, rhs]):
    chol, rhs = broadcast_matrix_batch_dims([chol, rhs])
    return linalg_ops.cholesky_solve(chol, rhs)