def test_multiply_kron(kron1, kron2): if B.shape(kron1.left) == B.shape(kron2.left) and B.shape( kron1.right) == B.shape(kron2.right): check_bin_op(B.multiply, kron1, kron2, asserted_type=Kronecker) else: with pytest.raises(AssertionError): B.matmul(kron1, kron2)
def test_matmul_lr(lr1, lr2): _check_matmul(lr1, lr2, asserted_type=LowRank) assert B.matmul(lr1, lr2).rank == min(lr1.rank, lr2.rank) # Check that middle is `Diagonal` if both are rank 1. if lr1.rank == 1 and lr2.rank == 1: assert isinstance(B.matmul(lr1, lr2).middle, Diagonal)
def test_matmul_kron(kron1, kron2): if (B.shape(kron1.left)[1] == B.shape(kron2.left)[0] and B.shape(kron1.right)[1] == B.shape(kron2.right)[0]): _check_matmul(kron1, kron2, asserted_type=Kronecker) else: with pytest.raises(AssertionError): B.matmul(kron1, kron2)
def matmul(a: Kronecker, b: Kronecker, tr_a=False, tr_b=False): _assert_composable(a.left, b.left, tr_a=tr_a, tr_b=tr_b) _assert_composable(a.right, b.right, tr_a=tr_a, tr_b=tr_b) return Kronecker( B.matmul(a.left, b.left, tr_a=tr_a, tr_b=tr_b), B.matmul(a.right, b.right, tr_a=tr_a, tr_b=tr_b), )
def predict(self, x, latent=False, return_variances=False): """Predict. Args: x (matrix): Input locations to predict at. latent (bool, optional): Predict noiseless processes. Defaults to `False`. return_variances (bool, optional): Return means and variances instead. Defaults to `False`. Returns: tuple[matrix]: Tuple containing means, lower 95% central credible bound, and upper 95% central credible bound if `variances` is `False`, and means and variances otherwise. """ mean, var = self.model.predict(x, latent=latent, return_variances=True) # Pull means and variances through mixing matrix. mean = B.dense(B.matmul(mean, self.h, tr_b=True)) var = B.dense(B.matmul(var, self.h ** 2, tr_b=True)) if not latent: var = var + self.noise_obs if return_variances: return mean, var else: error = 1.96 * B.sqrt(var) return mean, mean - error, mean + error
def test_matmul_zero_dense(zero_r, dense_r): check_bin_op(lambda a, b: B.matmul(a, b, tr_b=True), zero_r, dense_r, asserted_type=Zero) check_bin_op(lambda a, b: B.matmul(a, b, tr_b=True), dense_r, zero_r, asserted_type=Zero)
def inv(a: Woodbury): diag_inv = B.inv(a.diag) # Explicitly computing the inverse is not great numerically, but solving # against left or right destroys symmetry, which hinders further algebraic # simplifications. return B.subtract( diag_inv, LowRank(B.matmul(diag_inv, a.lr.left), B.matmul(diag_inv, a.lr.right), B.inv(B.schur(a))))
def matmul(a: LowRank, b: LowRank, tr_a=False, tr_b=False): _assert_composable(a, b, tr_a=tr_a, tr_b=tr_b) a = _tr(a, tr_a) b = _tr(b, tr_b) middle = B.matmul(a.right, b.left, tr_a=True) middle = B.matmul(a.middle, middle, b.middle) # Let `middle` be of type `Diagonal` if possible. if B.shape(middle) == (1, 1): return LowRank(a.left, b.right, Diagonal(middle[0])) else: return LowRank(a.left, b.right, middle)
def pd_inv(a: Woodbury): diag_inv = B.inv(a.diag) # See comment in `inv`. return B.subtract( diag_inv, LowRank( B.matmul(diag_inv, a.lr.left), B.matmul(diag_inv, a.lr.right), B.pd_inv(B.pd_schur(a)), ), )
def matmul(a, b, c, tr_a=False, tr_b=False, tr_c=False): ar, ac = _shape_tr(a, tr_a) br, bc = _shape_tr(b, tr_b) cr, cc = _shape_tr(c, tr_c) cost_ab_first = ar * ac * bc + ar * bc * cc cost_bc_first = br * bc * cc + ar * br * cc if cost_ab_first <= cost_bc_first: return B.matmul(B.matmul(a, b, tr_a=tr_a, tr_b=tr_b), c, tr_b=tr_c) else: return B.matmul(a, B.matmul(b, c, tr_a=tr_b, tr_b=tr_c), tr_a=tr_a)
def sample(model, t, noise_f): """Sample from a model. Args: model (:class:`gpcm.model.AbstractGPCM`): Model to sample from. t (vector): Time points to sample at. noise_f (vector): Noise for the sample of the function. Should have the same size as `t`. Returns: tuple[vector]: Tuple containing kernel samples and function samples. """ ks, fs = [], [] with wbml.out.Progress(name="Sampling", total=5) as progress: for i in range(5): # Sample kernel. u = B.sample(model.compute_K_u())[:, 0] K = model.kernel_approx(t, t, u) wbml.out.kv("Sampled variance", K[0, 0]) K = K / K[0, 0] ks.append(K[0, :]) # Sample function. f = B.matmul(B.chol(closest_psd(K)), noise_f) fs.append(f) progress() return ks, fs
def assert_orthogonal(x): """Assert that a matrix is orthogonal.""" # Check that matrix is square. assert B.shape(x)[0] == B.shape(x)[1] # Check that its transpose is its inverse. approx(B.matmul(x, x, tr_a=True), B.eye(x))
def test_ess(): # Construct a prior and a likelihood. prior = Normal(np.array([[0.6, 0.3], [0.3, 0.6]])) lik = Normal( np.array([[0.2], [0.3]]), np.array([[1, 0.2], [0.2, 1]]), ) # Perform sampling. sampler = ESS(lik.logpdf, prior.sample) num_samples = 30_000 samples = B.concat(*sampler.sample(num=num_samples), axis=1) samples_mean = B.mean(samples, axis=1)[:, None] samples_cov = ( B.matmul(samples - samples_mean, samples - samples_mean, tr_b=True) / num_samples) # Compute posterior statistics. prec_prior = B.inv(prior.var) prec_lik = B.inv(lik.var) cov = B.inv(prec_prior + prec_lik) mean = cov @ (prec_prior @ prior.mean + prec_lik @ lik.mean) approx(samples_cov, cov, atol=5e-2) approx(samples_mean, mean, atol=5e-2)
def _check_matmul_diag(a, b): for tr_a in [False, True]: for tr_b in [False, True]: approx( B.matmul_diag(a, b, tr_a=tr_a, tr_b=tr_b), B.diag(B.matmul(B.dense(a), B.dense(b), tr_a=tr_a, tr_b=tr_b)), )
def matmul(a: Diagonal, b: Kronecker, tr_a=False, tr_b=False): warn_upmodule( f"Cannot efficiently matrix-multiply {a} by {b}: " f"converting the Kronecker product to dense.", category=ToDenseWarning, ) return B.matmul(a, B.dense(b), tr_a=tr_a, tr_b=tr_b)
def matmul(a: AbstractMatrix, b: LowerTriangular, tr_a=False, tr_b=False): if structured(a): warn_upmodule( f"Matrix-multiplying {a} and {b}: converting to dense.", category=ToDenseWarning, ) return B.matmul(a, B.dense(b), tr_a=tr_a, tr_b=tr_b)
def _check_root(a, asserted_type=object): root = B.root(a) # Check correctness. approx(B.matmul(B.dense(root), B.dense(root)), B.dense(a)) # Check type. assert isinstance(root, asserted_type)
def _project_pattern(self, x, y, pattern): # Check whether all data is available. no_missing = all(pattern) if no_missing: # All data is available. Nothing to be done. u = self.u else: # Data is missing. Pick the available entries. y = B.take(y, pattern, axis=1) # Ensure that `u` remains a structured matrix. u = Dense(B.take(self.u, pattern)) # Get number of data points and outputs in this part of the data. n = B.shape(x)[0] p = sum(pattern) # Perform projection. proj_y_partial = B.matmul(y, B.pinv(u), tr_b=True) proj_y = B.matmul(proj_y_partial, B.inv(self.s_sqrt), tr_b=True) # Compute projected noise. u_square = B.matmul(u, u, tr_a=True) proj_noise = ( self.noise_obs / B.diag(self.s_sqrt) ** 2 * B.diag(B.pd_inv(u_square)) ) # Convert projected noise to weights. noises = self.model.noises weights = noises / (noises + proj_noise) proj_w = B.ones(B.dtype(weights), n, self.m) * weights[None, :] # Compute Frobenius norm. frob = B.sum(y ** 2) frob = frob - B.sum(proj_y_partial * B.matmul(proj_y_partial, u_square)) # Compute regularising term. reg = 0.5 * ( n * (p - self.m) * B.log(2 * B.pi * self.noise_obs) + frob / self.noise_obs + n * B.logdet(B.matmul(u, u, tr_a=True)) + n * 2 * B.logdet(self.s_sqrt) ) return x, proj_y, proj_w, reg
def _check_matmul(a, b, asserted_type=object, tr_both=False): for tr_a in [False, True]: for tr_b in [False, True]: check_bin_op( lambda a_, b_: B.matmul(a_, b_, tr_a=tr_a, tr_b=tr_b), a, b, asserted_type=asserted_type, )
def pinv(a: AbstractMatrix): """Compute the left pseudo-inverse. Args: a (matrix): Matrix to compute left pseudo-inverse of. Returns: matrix: Left pseudo-inverse of `a`. """ return B.cholsolve(B.chol(B.matmul(a, a, tr_a=True)), B.transpose(a))
def sample(model, t, noise_f): """Sample from a model. Args: model (:class:`gpcm.model.AbstractGPCM`): Model to sample from. t (vector): Time points to sample at. noise_f (vector): Noise for the sample of the function. Should have the same size as `t`. Returns: tuple[vector, ...]: Tuple containing kernel samples, filter samples, and function samples. """ ks, us, fs = [], [], [] # In the below, we look at the third inducing point, because that is the one # determining the value of the filter at zero: the CGPCM adds two extra inducing # points to the left. # Get a smooth sample. u1 = B.ones(model.n_u) while B.abs(u1[2]) > 1e-2: u1 = B.sample(model.compute_K_u())[:, 0] u = GP(model.k_h()) u = u | (u(model.t_u), u1) u1_full = u(t).mean.flatten() # Get a rough sample. u2 = B.zeros(model.n_u) while u2[2] < 0.5: u2 = B.sample(model.compute_K_u())[:, 0] u = GP(model.k_h()) u = u | (u(model.t_u), u2) u2_full = u(t).mean.flatten() with wbml.out.Progress(name="Sampling", total=5) as progress: for c in [0, 0.1, 0.23, 0.33, 0.5]: # Sample kernel. K = model.kernel_approx(t, t, c * u2 + (1 - c) * u1) wbml.out.kv("Sampled variance", K[0, 0]) K = K / K[0, 0] ks.append(K[0, :]) # Store filter. us.append(c * u2_full + (1 - c) * u1_full) # Sample function. f = B.matmul(B.chol(closest_psd(K)), noise_f) fs.append(f) progress() return ks, us, fs
def sample(a, num=1): # pragma: no cover """Sample from covariance matrices. Args: a (tensor): Covariance matrix to sample from. num (int): Number of samples. Returns: tensor: Samples as rank 2 column vectors. """ chol = B.cholesky(a) return B.matmul(chol, B.randn(B.dtype_float(a), B.shape(chol)[1], num))
def test_matmul_multiple(code_a, code_b, code_c): for tr_a in [True, False]: for tr_b in [True, False]: for tr_c in [True, False]: a = generate(code_a) b = generate(code_b) c = generate(code_c) if tr_a: a = B.transpose(a) if tr_b: b = B.transpose(b) if tr_c: c = B.transpose(c) approx( B.matmul(a, b, c, tr_a=tr_a, tr_b=tr_b, tr_c=tr_c), B.matmul(B.matmul(a, b, tr_a=tr_a, tr_b=tr_b), c, tr_b=tr_c), )
def test_matmul_assertion(zero_r, dense2): with pytest.raises(AssertionError): B.matmul(zero_r, dense2) with pytest.raises(AssertionError): B.matmul(zero_r, dense2, tr_b=True) with pytest.raises(AssertionError): B.matmul(zero_r, zero_r, tr_a=True, tr_b=True)
def test_cholesky_solve_ut(dense_pd): chol = B.cholesky(dense_pd) with AssertDenseWarning( [ "solving <upper-triangular> x = <diagonal>", "matrix-multiplying <upper-triangular> and <lower-triangular>", ] ): approx( B.cholesky_solve(B.transpose(chol), B.eye(chol)), B.inv(B.matmul(chol, chol, tr_a=True)), )
def sum(a: LowRank, axis=None): if axis is None: return B.sum( B.sum(B.matmul(a.left, a.middle), axis=0) * B.sum(a.right, axis=0)) elif axis == 0: return B.sum( B.multiply( B.expand_dims(B.sum(a.left, axis=0), axis=0), B.matmul(a.right, a.middle, tr_b=True), ), axis=1, ) elif axis == 1: return B.sum( B.multiply( B.matmul(a.left, a.middle), B.expand_dims(B.sum(a.right, axis=0), axis=0), ), axis=1, ) else: _raise(axis)
def diag(a: LowRank): if structured(a.left, a.right): warn_upmodule( f"Getting the diagonal of {a}: converting the factors to dense.", category=ToDenseWarning, ) diag_len = _diag_len(a) left_mul = B.matmul(a.left, a.middle) return B.sum( B.multiply( B.dense(left_mul)[:diag_len, :], B.dense(a.right)[:diag_len, :]), axis=1, )
def sample(self, x, latent=False): """Sample from the model. Args: x (matrix): Locations to sample at. latent (bool, optional): Sample noiseless processes. Defaults to `False`. Returns: matrix: Sample. """ sample = B.dense( B.matmul(self.model.sample(x, latent=latent), self.h, tr_b=True) ) if not latent: sample = sample + B.sqrt(self.noise_obs) * B.randn(sample) return sample
def closest_psd(a, inv=False): """Map a matrix to the closest PSD matrix. Args: a (tensor): Matrix. inv (bool, optional): Also invert `a`. Returns: tensor: PSD matrix closest to `a` or the inverse of `a`. """ a = B.dense(a) a = (a + B.transpose(a)) / 2 u, s, v = B.svd(a) signs = B.matmul(u, v, tr_a=True) s = B.maximum(B.diag(signs) * s, 0) if inv: s = B.where(s == 0, 0, 1 / s) return B.mm(u * B.expand_dims(s, axis=-2), v, tr_b=True)
def test_linear(): layer = Linear(20) x = B.randn(10, 5, 3) # Check number of weights and width. assert layer.num_weights(3) == 3 * 20 + 20 assert layer.width == 20 # Check initialisation and width. vs = Vars(np.float64) layer.initialise(3, vs) assert layer.width == 20 # Check batch consistency. check_batch_consistency(layer, x) # Check correctness. approx(layer(x), B.matmul(x, layer.A[None, :, :]) + layer.b[None, :, :])