def _Underdetermined(op, grad): """Gradients for the underdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the second kind: X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B that (for lambda=0) solve the least squares problem min ||X||_F subject to A*X = B. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) # pylint: disable=protected-access chol = linalg_ops._RegularizedGramianCholesky( a, l2_regularizer=l2_regularizer, first_kind=False) # pylint: enable=protected-access grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad)) # Temporary tmp = (A * A^T + lambda * I)^{-1} * B. tmp = linalg_ops.cholesky_solve(chol, b) a1 = math_ops.matmul(tmp, a, adjoint_a=True) a1 = -math_ops.matmul(grad_b, a1) a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True) a2 = math_ops.matmul(tmp, a2, adjoint_b=True) grad_a = a1 + a2 return (grad_a, grad_b, None)
def _underdetermined(op, grad): """Gradients for the underdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the second kind: X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B that (for lambda=0) solve the least squares problem min ||X||_F subject to A*X = B. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] m = a_shape[-2] identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype) gramian = math_ops.matmul(a, a, adjoint_b=True) + l2_regularizer * identity chol = linalg_ops.cholesky(gramian) grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad)) # Temporary tmp = (A * A^T + lambda * I)^{-1} * B. tmp = linalg_ops.cholesky_solve(chol, b) a1 = math_ops.matmul(tmp, a, adjoint_a=True) a1 = -math_ops.matmul(grad_b, a1) a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True) a2 = math_ops.matmul(tmp, a2, adjoint_b=True) grad_a = a1 + a2 return (grad_a, grad_b, None)
def _underdetermined(op, grad): """Gradients for the underdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the second kind: X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B that (for lambda=0) solve the least squares problem min ||X||_F subject to A*X = B. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = op.inputs[2] a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] m = a_shape[-2] identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype) gramian = math_ops.batch_matmul( a, a, adj_y=True) + l2_regularizer * identity chol = linalg_ops.cholesky(gramian) grad_b = linalg_ops.cholesky_solve(chol, math_ops.batch_matmul(a, grad)) # Temporary z = (A * A^T + lambda * I)^{-1} * B. z = linalg_ops.cholesky_solve(chol, b) bz = -math_ops.batch_matmul(grad_b, z, adj_y=True) bz_sym = bz + array_ops.matrix_transpose(bz) grad_a = math_ops.batch_matmul(bz_sym, a) + math_ops.batch_matmul(z, grad) return (grad_a, grad_b, None)
def _overdetermined(op, grad): """Gradients for the overdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the first kind: X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B which solve the least squares problem min ||A * X - B||_F^2 + lambda ||X||_F^2. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) x = op.outputs[0] a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] n = a_shape[-1] identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype) gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity chol = linalg_ops.cholesky(gramian) # Temporary z = (A^T * A + lambda * I)^{-1} * grad. z = linalg_ops.cholesky_solve(chol, grad) xzt = math_ops.matmul(x, z, adjoint_b=True) zx_sym = xzt + array_ops.matrix_transpose(xzt) grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True) grad_b = math_ops.matmul(a, z) return (grad_a, grad_b, None)
def _batch_sqrt_solve(self, rhs): # Recall the square root of this operator is M + VDV^T. # The Woodbury formula gives: # (M + VDV^T)^{-1} # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1} # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1} # where C is the capacitance matrix. m = self._operator v = self._v cchol = self._chol_capacitance(batch_mode=True) # The operators will use batch/singleton mode automatically. We don't # override. # M^{-1} rhs minv_rhs = m.solve(rhs) # V^T M^{-1} rhs vt_minv_rhs = math_ops.batch_matmul(v, minv_rhs, adj_x=True) # C^{-1} V^T M^{-1} rhs cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs) # V C^{-1} V^T M^{-1} rhs v_cinv_vt_minv_rhs = math_ops.batch_matmul(v, cinv_vt_minv_rhs) # M^{-1} V C^{-1} V^T M^{-1} rhs minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs) # M^{-1} - M^{-1} V C^{-1} V^T M^{-1} return minv_rhs - minv_v_cinv_vt_minv_rhs
def _sqrt_solve(self, rhs): # Recall the square root of this operator is M + VDV^T. # The Woodbury formula gives: # (M + VDV^T)^{-1} # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1} # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1} # where C is the capacitance matrix. # TODO(jvdillon) Determine if recursively applying rank-1 updates is more # efficient. May not be possible because a general n x n matrix can be # represeneted as n rank-1 updates, and solving with this matrix is always # done in O(n^3) time. m = self._operator v = self._v cchol = self._chol_capacitance(batch_mode=False) # The operators will use batch/singleton mode automatically. We don't # override. # M^{-1} rhs minv_rhs = m.solve(rhs) # V^T M^{-1} rhs vt_minv_rhs = math_ops.matmul(v, minv_rhs, transpose_a=True) # C^{-1} V^T M^{-1} rhs cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs) # V C^{-1} V^T M^{-1} rhs v_cinv_vt_minv_rhs = math_ops.matmul(v, cinv_vt_minv_rhs) # M^{-1} V C^{-1} V^T M^{-1} rhs minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs) # M^{-1} - M^{-1} V C^{-1} V^T M^{-1} return minv_rhs - minv_v_cinv_vt_minv_rhs
def posdef_inv_cholesky(tensor, identity, damping): """Computes inverse(tensor + damping * identity) with Cholesky.""" # tensor = math_ops.to_double(tensor) # damping = math_ops.to_double(damping) # identity = math_ops.to_double(identity) chol = linalg_ops.cholesky(tensor + damping * identity) return linalg_ops.cholesky_solve(chol, identity)
def _Overdetermined(op, grad): """Gradients for the overdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the first kind: X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B which solve the least squares problem min ||A * X - B||_F^2 + lambda ||X||_F^2. """ a = op.inputs[0] b = op.inputs[1] x = op.outputs[0] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) # pylint: disable=protected-access chol = linalg_ops._RegularizedGramianCholesky( a, l2_regularizer=l2_regularizer, first_kind=True) # pylint: enable=protected-access # Temporary z = (A^T * A + lambda * I)^{-1} * grad. z = linalg_ops.cholesky_solve(chol, grad) xzt = math_ops.matmul(x, z, adjoint_b=True) zx_sym = xzt + array_ops.matrix_transpose(xzt) grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul( b, z, adjoint_b=True) grad_b = math_ops.matmul(a, z) return (grad_a, grad_b, None)
def _overdetermined(op, grad): """Gradients for the overdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the first kind: X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B which solve the least squares problem min ||A * X - B||_F^2 + lambda ||X||_F^2. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) x = op.outputs[0] a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] n = a_shape[-1] identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype) gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity chol = linalg_ops.cholesky(gramian) # Temporary z = (A^T * A + lambda * I)^{-1} * grad. z = linalg_ops.cholesky_solve(chol, grad) xzt = math_ops.matmul(x, z, adjoint_b=True) zx_sym = xzt + array_ops.matrix_transpose(xzt) grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul( b, z, adjoint_b=True) grad_b = math_ops.matmul(a, z) return (grad_a, grad_b, None)
def _solve(self, rhs, adjoint=False, adjoint_arg=False): if self.is_square is False: raise NotImplementedError( "Solve is not yet implemented for non-square operators.") rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs if self._can_use_cholesky(): return linalg_ops.cholesky_solve(self._get_cached_chol(), rhs) return linalg_ops.matrix_solve( self._get_cached_dense_matrix(), rhs, adjoint=adjoint)
def test_static_dims_broadcast(self): # batch_shape = [2] chol = rng.rand(3, 3) rhs = rng.rand(2, 3, 7) chol_broadcast = chol + np.zeros((2, 1, 1)) result = linear_operator_util.cholesky_solve_with_broadcast(chol, rhs) self.assertAllEqual((2, 3, 7), result.get_shape()) expected = linalg_ops.cholesky_solve(chol_broadcast, rhs) self.assertAllClose(*self.evaluate([expected, result]))
def _dense_solve(self, rhs, adjoint=False, adjoint_arg=False): """Solve by conversion to a dense matrix.""" if self.is_square is False: # pylint: disable=g-bool-id-comparison raise NotImplementedError( "Solve is not yet implemented for non-square operators.") rhs = linalg.adjoint(rhs) if adjoint_arg else rhs if self._can_use_cholesky(): return linalg_ops.cholesky_solve( linalg_ops.cholesky(self.to_dense()), rhs) return linear_operator_util.matrix_solve_with_broadcast( self.to_dense(), rhs, adjoint=adjoint)
def test_static_dims_broadcast(self): # batch_shape = [2] chol = rng.rand(3, 3) rhs = rng.rand(2, 3, 7) chol_broadcast = chol + np.zeros((2, 1, 1)) with self.cached_session(): result = linear_operator_util.cholesky_solve_with_broadcast(chol, rhs) self.assertAllEqual((2, 3, 7), result.get_shape()) expected = linalg_ops.cholesky_solve(chol_broadcast, rhs) self.assertAllEqual(expected.eval(), result.eval())
def _solve(self, rhs, adjoint=False, adjoint_arg=False): """Default implementation of _solve.""" if self.is_square is False: raise NotImplementedError( "Solve is not yet implemented for non-square operators.") logging.warn( "Using (possibly slow) default implementation of solve." " Requires conversion to a dense matrix and O(N^3) operations.") rhs = linalg.adjoint(rhs) if adjoint_arg else rhs if self._can_use_cholesky(): return linalg_ops.cholesky_solve( linalg_ops.cholesky(self.to_dense()), rhs) return linalg_ops.matrix_solve(self.to_dense(), rhs, adjoint=adjoint)
def _solve(self, rhs, adjoint=False, adjoint_arg=False): """Default implementation of _solve.""" if self.is_square is False: raise NotImplementedError( "Solve is not yet implemented for non-square operators.") logging.warn( "Using (possibly slow) default implementation of solve." " Requires conversion to a dense matrix and O(N^3) operations.") rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs if self._can_use_cholesky(): return linalg_ops.cholesky_solve(self._get_cached_chol(), rhs) return linalg_ops.matrix_solve( self._get_cached_dense_matrix(), rhs, adjoint=adjoint)
def test_works_with_five_different_random_pos_def_matrices(self): for n in range(1, 6): for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]: with self.session(use_gpu=True): # Create 2 x n x n matrix array = np.array( [_RandomPDMatrix(n, self.rng), _RandomPDMatrix(n, self.rng)]).astype(np_type) chol = linalg_ops.cholesky(array) for k in range(1, 3): rhs = self.rng.randn(2, n, k).astype(np_type) x = linalg_ops.cholesky_solve(chol, rhs) self.assertAllClose( rhs, math_ops.matmul(array, x).eval(), atol=atol)
def test_dynamic_dims_broadcast_64bit(self): # batch_shape = [2, 2] chol = rng.rand(2, 3, 3) rhs = rng.rand(2, 1, 3, 7) chol_broadcast = chol + np.zeros((2, 2, 1, 1)) rhs_broadcast = rhs + np.zeros((2, 2, 1, 1)) chol_ph = array_ops.placeholder_with_default(chol, shape=None) rhs_ph = array_ops.placeholder_with_default(rhs, shape=None) result, expected = self.evaluate([ linear_operator_util.cholesky_solve_with_broadcast(chol_ph, rhs_ph), linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast) ]) self.assertAllClose(expected, result)
def _solve(self, rhs, adjoint=False, adjoint_arg=False): if self.base_operator.is_non_singular is False: raise ValueError( "Solve not implemented unless this is a perturbation of a " "non-singular LinearOperator.") # The Woodbury formula gives: # https://en.wikipedia.org/wiki/Woodbury_matrix_identity # (L + UDV^H)^{-1} # = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1} # = L^{-1} - L^{-1} U C^{-1} V^H L^{-1} # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U # Note also that, with ^{-H} being the inverse of the adjoint, # (L + UDV^H)^{-H} # = L^{-H} - L^{-H} V C^{-H} U^H L^{-H} l = self.base_operator if adjoint: # If adjoint, U and V have flipped roles in the operator. v, u = self._get_uv_as_tensors() # Capacitance should still be computed with u=self.u and v=self.v, which # after the "flip" on the line above means u=v, v=u. I.e. no need to # "flip" in the capacitance call, since the call to # matrix_solve_with_broadcast below is done with the `adjoint` argument, # and this takes care of things. capacitance = self._make_capacitance(u=v, v=u) else: u, v = self._get_uv_as_tensors() capacitance = self._make_capacitance(u=u, v=v) # L^{-1} rhs linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg) # V^H L^{-1} rhs vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True) # C^{-1} V^H L^{-1} rhs if self._use_cholesky: capinv_vh_linv_rhs = linalg_ops.cholesky_solve( linalg_ops.cholesky(capacitance), vh_linv_rhs) else: capinv_vh_linv_rhs = linear_operator_util.matrix_solve_with_broadcast( capacitance, vh_linv_rhs, adjoint=adjoint) # U C^{-1} V^H M^{-1} rhs u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs) # L^{-1} U C^{-1} V^H L^{-1} rhs linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint) # L^{-1} - L^{-1} U C^{-1} V^H L^{-1} return linv_rhs - linv_u_capinv_vh_linv_rhs
def _solve(self, rhs, adjoint=False, adjoint_arg=False): if self.base_operator.is_non_singular is False: raise ValueError( "Solve not implemented unless this is a perturbation of a " "non-singular LinearOperator.") # The Woodbury formula gives: # https://en.wikipedia.org/wiki/Woodbury_matrix_identity # (L + UDV^H)^{-1} # = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1} # = L^{-1} - L^{-1} U C^{-1} V^H L^{-1} # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U # Note also that, with ^{-H} being the inverse of the adjoint, # (L + UDV^H)^{-H} # = L^{-H} - L^{-H} V C^{-H} U^H L^{-H} l = self.base_operator if adjoint: v = self.u u = self.v else: v = self.v u = self.u # L^{-1} rhs linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg) # V^H L^{-1} rhs vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True) # C^{-1} V^H L^{-1} rhs if self._use_cholesky: capinv_vh_linv_rhs = linalg_ops.cholesky_solve( self._chol_capacitance, vh_linv_rhs) else: capinv_vh_linv_rhs = linalg_ops.matrix_solve(self._capacitance, vh_linv_rhs, adjoint=adjoint) # U C^{-1} V^H M^{-1} rhs u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs) # L^{-1} U C^{-1} V^H L^{-1} rhs linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint) # L^{-1} - L^{-1} U C^{-1} V^H L^{-1} return linv_rhs - linv_u_capinv_vh_linv_rhs
def test_dynamic_dims_broadcast_64bit(self): # batch_shape = [2, 2] chol = rng.rand(2, 3, 3) rhs = rng.rand(2, 1, 3, 7) chol_broadcast = chol + np.zeros((2, 2, 1, 1)) rhs_broadcast = rhs + np.zeros((2, 2, 1, 1)) chol_ph = array_ops.placeholder(dtypes.float64) rhs_ph = array_ops.placeholder(dtypes.float64) with self.cached_session() as sess: result, expected = sess.run([ linear_operator_util.cholesky_solve_with_broadcast( chol_ph, rhs_ph), linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast) ], feed_dict={ chol_ph: chol, rhs_ph: rhs, }) self.assertAllClose(expected, result)
def _solve(self, rhs, adjoint=False): if self.base_operator.is_non_singular is False: raise ValueError( "Solve not implemented unless this is a perturbation of a " "non-singular LinearOperator.") # The Woodbury formula gives: # https://en.wikipedia.org/wiki/Woodbury_matrix_identity # (L + UDV^H)^{-1} # = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1} # = L^{-1} - L^{-1} U C^{-1} V^H L^{-1} # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U # Note also that, with ^{-H} being the inverse of the adjoint, # (L + UDV^H)^{-H} # = L^{-H} - L^{-H} V C^{-H} U^H L^{-H} l = self.base_operator if adjoint: v = self.u u = self.v else: v = self.v u = self.u # L^{-1} rhs linv_rhs = l.solve(rhs, adjoint=adjoint) # V^H L^{-1} rhs vh_linv_rhs = math_ops.matmul(v, linv_rhs, adjoint_a=True) # C^{-1} V^H L^{-1} rhs if self._use_cholesky: capinv_vh_linv_rhs = linalg_ops.cholesky_solve( self._chol_capacitance, vh_linv_rhs) else: capinv_vh_linv_rhs = linalg_ops.matrix_solve( self._capacitance, vh_linv_rhs, adjoint=adjoint) # U C^{-1} V^H M^{-1} rhs u_capinv_vh_linv_rhs = math_ops.matmul(u, capinv_vh_linv_rhs) # L^{-1} U C^{-1} V^H L^{-1} rhs linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint) # L^{-1} - L^{-1} U C^{-1} V^H L^{-1} return linv_rhs - linv_u_capinv_vh_linv_rhs
def testCholeskyFwdBackwd(self): with self.session() as sess: cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 1) ipu.utils.configure_ipu_system(cfg) with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): ls = variable_scope.get_variable( "lengthscale", shape=[1], dtype=np.float32, initializer=init_ops.constant_initializer( np.array([0.2], dtype=np.float32))) x = array_ops.placeholder(np.float32, shape=[4, 2]) expected = array_ops.placeholder(np.float32, shape=[4, 1]) x_t = array_ops.transpose(array_ops.expand_dims(x, 0), (1, 0, 2)) K = math_ops.exp(-0.5 * math_ops.reduce_sum(((x_t - x) / ls)**2., -1)) L = linalg_ops.cholesky(K) alpha = linalg_ops.cholesky_solve(L, expected) loss = -math_ops.reduce_mean( math_ops.matmul(alpha, expected, transpose_a=True)) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) fd = { x: np.array([[1., 0.63920265], [0.63920265, 1.], [0.30846608, 0.24088137], [0.38437635, 0.76085484]], dtype=np.float32), expected: [[0.4662998], [-0.27042738], [-0.1996377], [-1.1648941]] } sess.run(variables.global_variables_initializer()) sess.run(train, feed_dict=fd)
def test_dynamic_dims_broadcast_64bit(self): # batch_shape = [2, 2] chol = rng.rand(2, 3, 3) rhs = rng.rand(2, 1, 3, 7) chol_broadcast = chol + np.zeros((2, 2, 1, 1)) rhs_broadcast = rhs + np.zeros((2, 2, 1, 1)) chol_ph = array_ops.placeholder(dtypes.float64) rhs_ph = array_ops.placeholder(dtypes.float64) with self.cached_session() as sess: result, expected = sess.run( [ linear_operator_util.cholesky_solve_with_broadcast( chol_ph, rhs_ph), linalg_ops.cholesky_solve(chol_broadcast, rhs_broadcast) ], feed_dict={ chol_ph: chol, rhs_ph: rhs, }) self.assertAllEqual(expected, result)
def _Overdetermined(op, grad): """Gradients for the overdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the first kind: X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B which solve the least squares problem min ||A * X - B||_F^2 + lambda ||X||_F^2. """ a = op.inputs[0] b = op.inputs[1] x = op.outputs[0] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) # pylint: disable=protected-access chol = linalg_ops._RegularizedGramianCholesky( a, l2_regularizer=l2_regularizer, first_kind=True) # pylint: enable=protected-access # Temporary z = (A^T * A + lambda * I)^{-1} * grad. z = linalg_ops.cholesky_solve(chol, grad) xzt = math_ops.matmul(x, z, adjoint_b=True) zx_sym = xzt + array_ops.matrix_transpose(xzt) grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True) grad_b = math_ops.matmul(a, z) return (grad_a, grad_b, None)
def posdef_inv_cholesky(tensor, reg_mat, damping): """Computes inverse(tensor + damping * reg_mat) with Cholesky.""" chol = linalg_ops.cholesky(tensor + damping * reg_mat) identity = linalg_ops.eye(tf.shape(tensor)[0], dtype=tensor.dtype) return linalg_ops.cholesky_solve(chol, identity)
def _batch_solve(self, rhs): return linalg_ops.cholesky_solve(self._chol, rhs)
def cholesky_solve_with_broadcast(chol, rhs, name=None): """Solve systems of linear equations.""" with ops.name_scope(name, "CholeskySolveWithBroadcast", [chol, rhs]): chol, rhs = broadcast_matrix_batch_dims([chol, rhs]) return linalg_ops.cholesky_solve(chol, rhs)
def _solve(self, rhs, adjoint=False, adjoint_arg=False): rhs = linear_operator_util.matrix_adjoint(rhs) if adjoint_arg else rhs if self._is_spd: return linalg_ops.cholesky_solve(self._chol, rhs) return linalg_ops.matrix_solve(self._matrix, rhs, adjoint=adjoint)
def posdef_inv_cholesky(tensor, identity, damping): """Computes inverse(tensor + damping * identity) with Cholesky.""" chol = linalg_ops.cholesky(tensor + damping * identity) return linalg_ops.cholesky_solve(chol, identity)
def _solve(self, rhs, adjoint=False): if self._is_spd: return linalg_ops.cholesky_solve(self._chol, rhs) return linalg_ops.matrix_solve(self._matrix, rhs, adjoint=adjoint)