def _preprocess_linear_system(A, b, x0=None): """ Transform the linear system to an appropriate form. Parameters ---------- A : array-like or LinearOperator, shape=(n,n) A square linear operator (or matrix). Only matrix-vector products :math:`Av` are used internally. b : array_like, shape=(n,) or (n, nrhs) Right-hand side vector or matrix in :math:`A x = b`. x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs) Optional. Prior belief for the solution of the linear system. Will be ignored if ``Ainv0`` is given. Returns ------- A : RandomVariable, shape=(n,n) Prior belief over the linear operator :math:`A`. b : array-like, shape=(n,) or (n, nrhs) Right-hand-side of the linear system. x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs) Optional. Prior belief for the solution of the linear system. Will be ignored if ``Ainv0`` is given. """ # Transform linear system to correct dimensions if not isinstance(b, probnum.RandomVariable): b = utils.as_colvec(b) # (n,) -> (n, 1) if x0 is not None: x0 = utils.as_colvec(x0) # (n,) -> (n, 1) return A, b, x0
def test_kernel_matrix(input_dim, nu): """Check that the product Matérn kernel matrix is an elementwise product of 1D Matérn kernel matrices.""" lengthscale = 1.25 matern = kernels.Matern(input_shape=(1,), lengthscale=lengthscale, nu=nu) product_matern = kernels.ProductMatern( input_shape=(input_dim,), lengthscales=lengthscale, nus=nu ) rng = np.random.default_rng(42) num_xs = 15 xs = rng.random(size=(num_xs, input_dim)) kernel_matrix1 = product_matern.matrix(xs) kernel_matrix2 = np.ones(shape=(num_xs, num_xs)) for dim in range(input_dim): kernel_matrix2 *= matern.matrix(_utils.as_colvec(xs[:, dim])) np.testing.assert_allclose( kernel_matrix1, kernel_matrix2, )
def problinsolve( A: SquareLinOp, b: RandomVecMat, A0: Optional[SquareLinOp] = None, Ainv0: Optional[SquareLinOp] = None, x0: Optional[RandomVecMat] = None, assume_A: str = "sympos", maxiter: Optional[int] = None, atol: float = 10**-6, rtol: float = 10**-6, callback: Optional[Callable] = None, **kwargs ) -> Tuple["probnum.RandomVariable", "probnum.RandomVariable", "probnum.RandomVariable", Dict]: """ Infer a solution to the linear system :math:`A x = b` in a Bayesian framework. Probabilistic linear solvers infer solutions to problems of the form .. math:: Ax=b, where :math:`A \\in \\mathbb{R}^{n \\times n}` and :math:`b \\in \\mathbb{R}^{n}`. They return a probability measure which quantifies uncertainty in the output arising from finite computational resources. This solver can take prior information either on the linear operator :math:`A` or its inverse :math:`H=A^{-1}` in the form of a random variable ``A0`` or ``Ainv0`` and outputs a posterior belief over :math:`A` or :math:`H`. This code implements the method described in Wenger et al. [1]_ based on the work in Hennig et al. [2]_. Parameters ---------- A : *shape=(n, n)* -- A square linear operator (or matrix). Only matrix-vector products :math:`v \\mapsto Av` are used internally. b : *shape=(n, ) or (n, nrhs)* -- Right-hand side vector, matrix or random variable in :math:`A x = b`. For multiple right hand sides, ``nrhs`` problems are solved sequentially with the posteriors over the matrices acting as priors for subsequent solves. If the right-hand-side is assumed to be noisy, every iteration of the solver samples a realization from ``b``. A0 : *shape=(n, n)* -- A square matrix, linear operator or random variable representing the prior belief over the linear operator :math:`A`. If an array or linear operator is given, a prior distribution is chosen automatically. Ainv0 : *shape=(n, n)* -- A square matrix, linear operator or random variable representing the prior belief over the inverse :math:`H=A^{-1}`. This can be viewed as taking the form of a pre-conditioner. If an array or linear operator is given, a prior distribution is chosen automatically. x0 : *shape=(n, ) or (n, nrhs)* -- Prior belief for the solution of the linear system. Will be ignored if ``Ainv0`` is given. assume_A : Assumptions on the linear operator which can influence solver choice and behavior. The available options are (combinations of) ==================== ========= generic matrix ``gen`` symmetric ``sym`` positive definite ``pos`` (additive) noise ``noise`` ==================== ========= maxiter : Maximum number of iterations. Defaults to :math:`10n`, where :math:`n` is the dimension of :math:`A`. atol : Absolute convergence tolerance. rtol : Relative convergence tolerance. callback : User-supplied function called after each iteration of the linear solver. It is called as ``callback(xk, Ak, Ainvk, sk, yk, alphak, resid, **kwargs)`` and can be used to return quantities from the iteration. Note that depending on the function supplied, this can slow down the solver considerably. kwargs : optional Optional keyword arguments passed onto the solver iteration. Returns ------- x : Approximate solution :math:`x` to the linear system. Shape of the return matches the shape of ``b``. A : Posterior belief over the linear operator. Ainv : Posterior belief over the linear operator inverse :math:`H=A^{-1}`. info : Information on convergence of the solver. Raises ------ ValueError If size mismatches detected or input matrices are not square. LinAlgError If the matrix ``A`` is singular. LinAlgWarning If an ill-conditioned input ``A`` is detected. Notes ----- For a specific class of priors the posterior mean of :math:`x_k=Hb` coincides with the iterates of the conjugate gradient method. The matrix-based view taken here recovers the solution-based inference of :func:`bayescg` [3]_. References ---------- .. [1] Wenger, J. and Hennig, P., Probabilistic Linear Solvers for Machine Learning, 2020 .. [2] Hennig, P., Probabilistic Interpretation of Linear Solvers, *SIAM Journal on Optimization*, 2015, 25, 234-260 .. [3] Bartels, S. et al., Probabilistic Linear Solvers: A Unifying View, *Statistics and Computing*, 2019 See Also -------- bayescg : Solve linear systems with prior information on the solution. Examples -------- >>> import numpy as np >>> np.random.seed(1) >>> n = 20 >>> A = np.random.rand(n, n) >>> A = 0.5 * (A + A.T) + 5 * np.eye(n) >>> b = np.random.rand(n) >>> x, A, Ainv, info = problinsolve(A=A, b=b) >>> print(info["iter"]) 9 """ # Check linear system for type and dimension mismatch _check_linear_system(A=A, b=b, A0=A0, Ainv0=Ainv0, x0=x0) # Check matrix assumptions for correctness assume_A = assume_A.lower() _assume_A_tmp = assume_A for allowed_str in ["gen", "sym", "pos", "noise"]: _assume_A_tmp = _assume_A_tmp.replace(allowed_str, "") if _assume_A_tmp != "": raise ValueError( "Assumption '{}' contains unrecognized linear operator properties." .format(assume_A)) # Transform the linear system to an appropriate form A, b, x0 = _preprocess_linear_system(A=A, b=b, x0=x0) # Parameter initialization n = A.shape[0] nrhs = b.shape[1] x = x0 info = {} # Set convergence parameters if maxiter is None: maxiter = n * 10 if nrhs > 1: # Iteratively solve for multiple right hand sides (with posteriors as new # priors) for i in range(nrhs): if i > 0: x = None # Only use prior information on Ainv for multiple rhs # Select and initialize solver linear_solver = _init_solver( A=A, b=utils.as_colvec(b[:, i]), A0=A0, Ainv0=Ainv0, x0=x, assume_A=assume_A, ) # Solve linear system x, A0, Ainv0, info = linear_solver.solve(maxiter=maxiter, atol=atol, rtol=rtol, callback=callback, **kwargs) # Return Ainv @ b for multiple rhs x = Ainv0 @ b else: # Single right hand side linear_solver = _init_solver(A=A, b=b, A0=A0, Ainv0=Ainv0, x0=x, assume_A=assume_A) # Solve linear system x, A0, Ainv0, info = linear_solver.solve(maxiter=maxiter, atol=atol, rtol=rtol, callback=callback, **kwargs) # Check result and issue warnings (e.g. singular or ill-conditioned matrix) _postprocess(info=info, A=A) return x, A0, Ainv0, info
def _preprocess_linear_system(A, b, assume_A, A0=None, Ainv0=None, x0=None): """ Transform the linear system to linear operator and random variable form. Parameters ---------- A : array-like or LinearOperator or RandomVariable A square matrix, linear operator or random variable representing the prior belief over :math:`A`. b : array_like, shape=(n,) or (n, nrhs) Right-hand side vector or matrix in :math:`A x = b`. assume_A : str, default="sympos" Assumptions on the matrix, which can influence solver choice or behavior. The available options are ==================== ========= generic matrix ``gen`` symmetric ``sym`` positive definite ``pos`` symmetric pos. def. ``sympos`` ==================== ========= If ``A`` or ``Ainv`` are random variables, then the encoded assumptions in the distribution are used automatically. A0 : RandomVariable, shape=(n,n) Random variable representing the prior belief over the linear operator :math:`A`. Ainv0 : array-like or LinearOperator or RandomVariable, shape=(n,n) Optional. A square matrix, linear operator or random variable representing the prior belief over the inverse :math:`H=A^{-1}`. x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs) Optional. Prior belief for the solution of the linear system. Will be ignored if ``Ainv`` is given. Returns ------- A : RandomVariable, shape=(n,n) Prior belief over the linear operator :math:`A`. b : array-like, shape=(n,) or (n, nrhs) Right-hand-side of the linear system. A0 : RandomVariable, shape=(n,n) Prior belief over the linear operator :math:`A`. Ainv0 : RandomVariable, shape=(n,n) Prior belief over the linear operator inverse :math:`H=A^{-1}`. x : array-like or RandomVariable, shape=(n,) or (n, nrhs) Prior belief over the solution :math:`x` to the linear system. """ # Choose matrix based view if not clear from arguments if (Ainv0 is not None or A0 is not None) and x0 is not None: warnings.warn( "Cannot use prior information on both the matrix (inverse) and the solution. The latter will be ignored." ) x = None else: x = x0 # Check matrix assumptions if assume_A not in ["gen", "sym", "pos", "sympos"]: raise ValueError( '\'{}\' is not a recognized linear operator assumption.'.format( assume_A)) # Choose priors for A and Ainv if not specified, based on matrix assumptions in "assume_A" if assume_A == "sympos": # No priors specified if A0 is None and Ainv0 is None: dist = probability.Normal( mean=linear_operators.Identity(shape=A.shape[0]), cov=linear_operators.SymmetricKronecker( linear_operators.Identity(shape=A.shape[0]))) Ainv0 = probability.RandomVariable(distribution=dist) dist = probability.Normal( mean=linear_operators.Identity(shape=A.shape[0]), cov=linear_operators.SymmetricKronecker( linear_operators.Identity(shape=A.shape[0]))) A0 = probability.RandomVariable(distribution=dist) # Only prior on Ainv specified elif A0 is None and Ainv0 is not None: try: if isinstance(Ainv0, probability.RandomVariable): A0_mean = Ainv0.mean().inv() else: A0_mean = Ainv0.inv() except AttributeError: warnings.warn( message= "Prior specified only for Ainv. Inverting prior mean naively. " + "This operation is computationally costly! Specify an inverse prior (mean) instead." ) A0_mean = np.linalg.inv(Ainv0.mean()) except NotImplementedError: A0_mean = linear_operators.Identity(A.shape[0]) warnings.warn( message= "Prior specified only for Ainv. Automatic prior mean inversion not implemented, " + "falling back to standard normal prior.") # hereditary positive definiteness A0_covfactor = A dist = probability.Normal( mean=A0_mean, cov=linear_operators.SymmetricKronecker(A=A0_covfactor)) A0 = probability.RandomVariable(distribution=dist) # Only prior on A specified if A0 is not None and Ainv0 is None: try: if isinstance(A0, probability.RandomVariable): Ainv0_mean = A0.mean().inv() else: Ainv0_mean = A0.inv() except AttributeError: warnings.warn( message= "Prior specified only for Ainv. Inverting prior mean naively. " + "This operation is computationally costly! Specify an inverse prior (mean) instead." ) Ainv0_mean = np.linalg.inv(A0.mean()) except NotImplementedError: Ainv0_mean = linear_operators.Identity(A.shape[0]) warnings.warn( message="Prior specified only for Ainv. " + "Automatic prior mean inversion failed, falling back to standard normal prior." ) # (non-symmetric) posterior correspondence Ainv0_covfactor = Ainv0_mean dist = probability.Normal( mean=Ainv0_mean, cov=linear_operators.SymmetricKronecker(A=Ainv0_covfactor)) Ainv0 = probability.RandomVariable(distribution=dist) elif assume_A == "sym": raise NotImplementedError elif assume_A == "pos": raise NotImplementedError elif assume_A == "gen": # TODO: Implement case where only a pre-conditioner is given as Ainv0 # TODO: Automatic prior selection based on data scale, matrix trace, etc. raise NotImplementedError # Transform linear system to correct dimensions b = utils.as_colvec(b) # (n,) -> (n, 1) if x0 is not None: x = utils.as_colvec(x0) # (n,) -> (n, 1) assert (not (Ainv0 is None and x is None)), "Neither Ainv nor x are specified." return A, b, A0, Ainv0, x
def problinsolve(A, b, A0=None, Ainv0=None, x0=None, assume_A="sympos", maxiter=None, atol=10**-6, rtol=10**-6, callback=None, **kwargs): """ Infer a solution to the linear system :math:`A x = b` in a Bayesian framework. Probabilistic linear solvers infer solutions to problems of the form .. math:: Ax=b, where :math:`A \\in \\mathbb{R}^{n \\times n}` and :math:`b \\in \\mathbb{R}^{n}`. They return a probability measure which quantifies uncertainty in the output arising from finite computational resources. This solver can take prior information either on the linear operator :math:`A` or its inverse :math:`H=A^{-1}` in the form of a random variable ``A0`` or ``Ainv0`` and outputs a posterior belief over :math:`A` or :math:`H`. This code implements the method described in [1]_ based on the work in [2]_. Parameters ---------- A : array-like or LinearOperator, shape=(n,n) A square matrix or linear operator. b : array_like, shape=(n,) or (n, nrhs) Right-hand side vector or matrix in :math:`A x = b`. For multiple right hand sides, ``nrhs`` problems are solved sequentially with the posteriors over the matrices acting as priors for subsequent solves. A0 : RandomVariable, shape=(n, n), optional Prior belief over the linear operator :math:`A` provided as a :class:`~probnum.probability.RandomVariable`. Ainv0 : array-like or LinearOperator or RandomVariable, shape=(n,n), optional A square matrix, linear operator or random variable representing the prior belief over the inverse :math:`H=A^{-1}`. This can be viewed as taking the form of a pre-conditioner. If an array or linear operator is given, a prior distribution is chosen automatically. x0 : array-like, shape=(n,) or (n, nrhs), optional Initial guess for the solution of the linear system. Will be ignored if ``Ainv`` is given. assume_A : str, default="sympos" Assumptions on the matrix, which can influence solver choice or behavior. The available options are ==================== ========= generic matrix ``gen`` symmetric ``sym`` positive definite ``pos`` symmetric pos. def. ``sympos`` ==================== ========= If ``A`` or ``Ainv`` are random variables, then the encoded assumptions in the distribution are used automatically. maxiter : int, optional Maximum number of iterations. Defaults to :math:`10n`, where :math:`n` is the dimension of :math:`A`. atol : float, optional Absolute residual tolerance. If :math:`\\lVert r_i \\rVert = \\lVert Ax_i - b \\rVert < \\text{atol}`, the iteration terminates. rtol : float, optional Relative residual tolerance. If :math:`\\lVert r_i \\rVert < \\text{rtol} \\lVert b \\rVert`, the iteration terminates. callback : function, optional User-supplied function called after each iteration of the linear solver. It is called as ``callback(xk, Ak, Ainvk, sk, yk, alphak, resid)`` and can be used to return quantities from the iteration. Note that depending on the function supplied, this can slow down the solver. kwargs : optional Keyword arguments passed onto the solver iteration. Returns ------- x : RandomVariable, shape=(n,) or (n, nrhs) Approximate solution :math:`x` to the linear system. Shape of the return matches the shape of ``b``. A : RandomVariable, shape=(n,n) Posterior belief over the linear operator. Ainv : RandomVariable, shape=(n,n) Posterior belief over the linear operator inverse :math:`H=A^{-1}`. info : dict Information on convergence of the solver. Raises ------ ValueError If size mismatches detected or input matrices are not square. LinAlgError If the matrix ``A`` is singular. LinAlgWarning If an ill-conditioned input ``A`` is detected. Notes ----- For a specific class of priors the probabilistic linear solver recovers the iterates of the conjugate gradient method as the posterior mean of the induced distribution on :math:`x=Hb`. The matrix-based view taken here recovers the solution-based inference of :func:`bayescg` [3]_. References ---------- .. [1] Wenger, J. and Hennig, P., Probabilistic Linear Solvers for Machine Learning, 2020 .. [2] Hennig, P., Probabilistic Interpretation of Linear Solvers, *SIAM Journal on Optimization*, 2015, 25, 234-260 .. [3] Bartels, S. et al., Probabilistic Linear Solvers: A Unifying View, *Statistics and Computing*, 2019 See Also -------- bayescg : Solve linear systems with prior information on the solution. Examples -------- >>> import numpy as np >>> np.random.seed(1) >>> n = 20 >>> A = np.random.rand(n, n) >>> A = 0.5 * (A + A.T) + 5 * np.eye(n) >>> b = np.random.rand(n) >>> x, A, Ainv, info = problinsolve(A=A, b=b) >>> print(info["iter"]) 10 """ # Check linear system for type and dimension mismatch _check_linear_system(A=A, b=b, A0=A0, Ainv0=Ainv0, x0=x0) # Transform linear system components to random variables and linear operators A, b, A0, Ainv0, x0 = _preprocess_linear_system(A=A, b=b, A0=A0, Ainv0=Ainv0, x0=x0, assume_A=assume_A) # Parameter initialization n = A.shape[0] nrhs = b.shape[1] x = x0 info = {} # Set convergence parameters if maxiter is None: maxiter = n * 10 # Iteratively solve for multiple right hand sides (with posteriors as new priors) for i in range(nrhs): # Select and initialize solver linear_solver = _init_solver(A=A, b=utils.as_colvec(b[:, i]), A0=A0, Ainv0=Ainv0, x0=x) # Solve linear system x, A0, Ainv0, info = linear_solver.solve(maxiter=maxiter, atol=atol, rtol=rtol, callback=callback, **kwargs) # Return Ainv @ b for multiple rhs if nrhs > 1: x = Ainv0 @ b # Check solution and issue warnings (e.g. singular or ill-conditioned matrix) _check_solution(info=info) return x, A0, Ainv0, info