def test_dtype_wb_promotion(): wb = LowRank(B.ones(int, 5, 5)) + Diagonal(B.ones(int, 5)) assert B.dtype(wb) == np.int64 wb = LowRank(B.ones(float, 5, 5)) + Diagonal(B.ones(int, 5)) assert B.dtype(wb) == np.float64 wb = LowRank(B.ones(int, 5, 5)) + Diagonal(B.ones(float, 5)) assert B.dtype(wb) == np.float64
def test_kronecker_formatting(): left = Diagonal(B.ones(2)) right = Diagonal(B.ones(3)) assert (str(Kronecker( left, right)) == "<Kronecker product: shape=6x6, dtype=float64>") assert (repr(Kronecker( left, right)) == "<Kronecker product: shape=6x6, dtype=float64\n" " left=<diagonal matrix: shape=2x2, dtype=float64\n" " diag=[1. 1.]>\n" " right=<diagonal matrix: shape=3x3, dtype=float64\n" " diag=[1. 1. 1.]>>")
def _compute(self, measure): # Extract processes and inputs. p_x, x = self.fdd.p, self.fdd.x p_z, z = self.u.p, self.u.x # Construct the necessary kernel matrices. K_zx = measure.kernels[p_z, p_x](z, x) K_z = convert(measure.kernels[p_z](z), AbstractMatrix) self._K_z_store[id(measure)] = K_z # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a # `MultiInput`, because `x` then still designates the particular components # of `f`. Fix that by instead designating the elements of `e`. if isinstance(x, MultiInput): x_n = MultiInput(*(e(fdd.x) for e, fdd in zip(self.e.kernel.ps, x.get()))) else: x_n = x # Construct the noise kernel matrix. K_n = self.e.kernel(x_n) # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError("Kernel matrix of noise must be diagonal.") # And construct the components for the inducing point approximation. L_z = B.cholesky(K_z) A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) self._A_store[id(measure)] = A y_bar = uprank(self.y) - self.e.mean(x_n) - measure.means[p_x](x) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. mu = B.add( measure.means[p_z](z), B.iqf(A, B.solve(L_z, K_z), prod_y_bar), ) self._mu_store[id(measure)] = mu # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio( Diagonal(measure.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(K_z, K_zx)), K_n, ) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0] self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part + iqf_part)
def __call__(self, x, y): w_x, w_y = x.w, y.w x, y = x.get(), y.get() if x is y: return Diagonal(1 / w_x) else: return Zero(B.dtype(x), num_elements(x), num_elements(y))
def diagonalise(self): """Diagonalise the normal distribution by setting the correlations to zero. Returns: :class:`.Normal`: Diagonal version of the distribution. """ return Normal(self.mean, Diagonal(self.var_diag))
def _noise_as_matrix(noise: B.Numeric, dtype: B.DType, n: int): if B.isscalar(noise): return B.fill_diag(noise, n) elif B.rank(noise) == 1: return Diagonal(noise) else: return Dense(noise)
def test_logpdf_missing_data(): # Setup model. m = 3 noise = 1e-2 latent_noises = 2e-2 * B.ones(m) kernels = [0.5 * EQ().stretch(0.75) for _ in range(m)] x = B.linspace(0, 10, 20) # Concatenate two orthogonal matrices, to make the missing data # approximation exact. u1 = B.svd(B.randn(m, m))[0] u2 = B.svd(B.randn(m, m))[0] u = Dense(B.concat(u1, u2, axis=0) / B.sqrt(2)) s_sqrt = Diagonal(B.rand(m)) # Construct a reference model. oilmm_pp = ILMMPP(kernels, u @ s_sqrt, noise, latent_noises) # Sample to generate test data. y = oilmm_pp.sample(x, latent=False) # Throw away data, but retain orthogonality. y[5:10, 3:] = np.nan y[10:, :3] = np.nan # Construct OILMM to test. oilmm = OILMM(kernels, u, s_sqrt, noise, latent_noises) # Check that evidence is still exact. approx(oilmm_pp.logpdf(x, y), oilmm.logpdf(x, y), atol=1e-7)
def __call__(self, x, y): w_x, w_y = x.w, y.w x, y = x.get(), y.get() if x is y: return Diagonal(1 / w_x) else: x, y = uprank(x), uprank(y) return Zero(B.dtype(x), B.shape(x)[0], B.shape(y)[0])
def _compute(self): # Extract processes. p_x, x = type_parameter(self.x), self.x.get() p_z, z = type_parameter(self.z), self.z.get() # Construct the necessary kernel matrices. K_zx = self.graph.kernels[p_z, p_x](z, x) self._K_z = convert(self.graph.kernels[p_z](z), AbstractMatrix) # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a # `MultiInput`, because `x` then still designates the particular # components of `f`. Fix that by instead designating the elements of # `e`. if isinstance(x, MultiInput): x_n = MultiInput(*(p(xi.get()) for p, xi in zip(self.e.kernel.ps, x.get()))) else: x_n = x # Construct the noise kernel matrix. K_n = self.e.kernel(x_n) # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError('Kernel matrix of noise must be diagonal.') # And construct the components for the inducing point approximation. L_z = B.cholesky(self._K_z) self._A = B.add(B.eye(self._K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) y_bar = uprank(self.y) - self.e.mean(x_n) - self.graph.means[p_x](x) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. self._mu = B.add(self.graph.means[p_z](z), B.iqf(self._A, B.solve(L_z, self._K_z), prod_y_bar)) # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio(Diagonal(self.graph.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(self._K_z, K_zx)), K_n) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(self._A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(self._A, prod_y_bar)[0, 0] self._elbo = -0.5 * (trace_part + det_part + iqf_part)
def _compute(self, measure): # Extract processes and inputs. p_x, x, noise_x = self.fdd.p, self.fdd.x, self.fdd.noise p_z, z, noise_z = self.u.p, self.u.x, self.u.noise # Construct the necessary kernel matrices. K_zx = measure.kernels[p_z, p_x](z, x) K_z = B.add(measure.kernels[p_z](z), noise_z) self._K_z_store[id(measure)] = K_z # Noise kernel matrix: K_n = noise_x # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError( f"Kernel matrix of observation noise must be diagonal, " f'not "{type(K_n).__name__}".' ) # And construct the components for the inducing point approximation. L_z = B.cholesky(K_z) A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) self._A_store[id(measure)] = A y_bar = B.subtract(B.uprank(self.y), measure.means[p_x](x)) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. mu = B.add( measure.means[p_z](z), B.iqf(A, B.solve(L_z, K_z), prod_y_bar), ) self._mu_store[id(measure)] = mu # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio( Diagonal(measure.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(K_z, K_zx)), K_n, ) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0] self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part + iqf_part)
def construct_model(vs): kernels = [ vs.pos(1, name=f"{i}/var") * EQ().stretch(vs.pos(0.02, name=f"{i}/scale")) for i in range(m) ] noise = vs.pos(1e-2, name="noise") latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises") u = Dense(vs.orth(shape=(p, m), name="u")) s_sqrt = Diagonal(vs.pos(shape=(m, ), name="s_sqrt")) return OILMM(kernels, u, s_sqrt, noise, latent_noises)
def construct_oilmm(): # Setup model. kernels = [EQ(), 2 * EQ().stretch(1.5)] u, s_sqrt = B.svd(B.randn(3, 2))[:2] u = Dense(u) s_sqrt = Diagonal(s_sqrt) def construct_iolmm(noise_amplification=1): noise_obs = noise_amplification noises_latent = np.array([0.1, 0.2]) * noise_amplification return OILMM(kernels, u, s_sqrt, noise_obs, noises_latent) return construct_iolmm
def construct_model(vs): if args.separable: # Copy same kernel `m` times. kernel = [ Mat52().stretch(vs.bnd(6 * 30, lower=60, name="k_scale")) ] kernels = kernel * m else: # Parametrise different kernels. kernels = [ Mat52().stretch(vs.bnd(6 * 30, lower=60, name=f"{i}/k_scale")) for i in range(m) ] noise = vs.bnd(1e-2, name="noise") latent_noises = vs.bnd(1e-2 * B.ones(m), name="latent_noises") # Construct component of the mixing matrix over simulators. u = vs.orth(init=u_s_init, shape=(p_s, m_s), name="sims/u") s_sqrt = vs.bnd(init=s_sqrt_s_init, shape=(m_s, ), name="sims/s_sqrt") u_s = Dense(u) s_sqrt_s = Diagonal(s_sqrt) # Construct components of the mixing matrix over space from a # covariance. scales = vs.bnd(init=scales_init, name="space/scales") k = Mat52().stretch(scales) u, s, _ = B.svd(B.dense(k(loc))) u_r = Dense(u[:, :m_r]) s_sqrt_r = Diagonal(B.sqrt(s[:m_r])) # Compose. s_sqrt = Kronecker(s_sqrt_s, s_sqrt_r) u = Kronecker(u_s, u_r) return OILMM(kernels, u, s_sqrt, noise, latent_noises)
def test_woodbury_formatting(): diag = Diagonal(B.ones(3)) lr = LowRank(B.ones(3, 1), 2 * B.ones(3, 1)) assert str(Woodbury(diag, lr)) == "<Woodbury matrix: shape=3x3, dtype=float64>" assert (repr(Woodbury( diag, lr)) == "<Woodbury matrix: shape=3x3, dtype=float64\n" " diag=<diagonal matrix: shape=3x3, dtype=float64\n" " diag=[1. 1. 1.]>\n" " lr=<low-rank matrix: shape=3x3, dtype=float64, rank=1\n" " left=[[1.]\n" " [1.]\n" " [1.]]\n" " right=[[2.]\n" " [2.]\n" " [2.]]>>")
def construct_model(vs, m): kernels = [ vs.pos(0.5, name=f"{i}/k_var") * Matern52().stretch(vs.bnd(2 * 30, name=f"{i}/k_scale")) + vs.pos(0.5, name=f"{i}/k_per_var") * (Matern52().stretch( vs.bnd(1.0, name=f"{i}/k_per_scale")).periodic(365)) for i in range(m) ] noise = vs.pos(1e-2, name="noise") latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises") # Construct orthogonal matrix and time it. time_h_start = time.time() u = Dense(vs.orth(shape=(p, m), name="u")) s_sqrt = Diagonal(vs.pos(shape=(m, ), name="s_sqrt")) dur_h = time.time() - time_h_start return OILMM(kernels, u, s_sqrt, noise, latent_noises), dur_h
def test_normaliser(): # Create test data. mat = B.randn(3, 3) dist = Normal(B.randn(3, 1), mat @ mat.T) y = dist.sample(num=10).T # Create normaliser. norm = Normaliser(y) y_norm = norm.normalise(y) # Create distribution of normalised data. scale = Diagonal(norm.scale[0]) dist_norm = Normal( B.inv(scale) @ (dist.mean - norm.mean.T), B.inv(scale) @ dist.var @ B.inv(scale)) approx( B.sum(dist.logpdf(y.T)), B.sum(dist_norm.logpdf(y_norm.T)) + norm.normalise_logdet(y), )
def compute_K_z(model): """Covariance matrix :math:`K_z` of :math:`z_m` for :math:`m=0,\\ldots,2M`. Args: model (:class:`.gprv.GPRV`): Model. Returns: matrix: :math:`K_z`. """ # Compute harmonic frequencies. m = model.ms - B.cast(model.dtype, model.ms > model.m_max) * model.m_max omega = 2 * B.pi * m / (model.b - model.a) # Compute the parameters of the kernel matrix. lam_t = 1 alpha = 0.5 * (model.b - model.a) / psd_matern_12(omega, model.lam, lam_t) alpha = alpha + alpha * B.cast(model.dtype, model.ms == 0) beta = 1 / (lam_t**0.5) * B.cast(model.dtype, model.ms <= model.m_max) return Diagonal(alpha) + LowRank(left=beta[:, None])
def construct_model(vs): kernels = [ vs.pos(0.5, name=f"{i}/k_var") * Matern52().stretch(vs.bnd(2 * 30, name=f"{i}/k_scale")) + vs.pos(0.5, name=f"{i}/k_per_var") * (Matern52().stretch( vs.bnd(1.0, name=f"{i}/k_per_scale")).periodic(365)) for i in range(m) ] latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises") noise = vs.pos(1e-2, name="noise") # Construct components of mixing matrix from a covariance over # outputs. variance = vs.pos(1, name="h/variance") scales = vs.pos(init=scales_init, name="h/scales") k = variance * Matern52().stretch(scales) u, s, _ = B.svd(B.dense(B.reg(k(loc)))) u = Dense(u[:, :m]) s_sqrt = Diagonal(B.sqrt(s[:m])) return OILMM(kernels, u, s_sqrt, noise, latent_noises)
def test_compare_ilmm(): # Setup models. kernels = [EQ(), 2 * EQ().stretch(1.5)] noise_obs = 0.1 noises_latent = np.array([0.1, 0.2]) u, s_sqrt = B.svd(B.randn(3, 2))[:2] u = Dense(u) s_sqrt = Diagonal(s_sqrt) # Construct models. ilmm = ILMMPP(kernels, u @ s_sqrt, noise_obs, noises_latent) oilmm = OILMM(kernels, u, s_sqrt, noise_obs, noises_latent) # Construct data. x = B.linspace(0, 3, 5) y = ilmm.sample(x, latent=False) x2 = B.linspace(4, 7, 5) y2 = ilmm.sample(x2, latent=False) # Check LML before conditioning. approx(ilmm.logpdf(x, y), oilmm.logpdf(x, y)) approx(ilmm.logpdf(x2, y2), oilmm.logpdf(x2, y2)) ilmm = ilmm.condition(x, y) oilmm = oilmm.condition(x, y) # Check LML after conditioning. approx(ilmm.logpdf(x, y), oilmm.logpdf(x, y)) approx(ilmm.logpdf(x2, y2), oilmm.logpdf(x2, y2)) # Predict. means_pp, lowers_pp, uppers_pp = ilmm.predict(x2) means, lowers, uppers = oilmm.predict(x2) # Check predictions. approx(means_pp, means) approx(lowers_pp, lowers) approx(uppers_pp, uppers)
def test_woodbury_attributes(): diag = Diagonal(B.ones(3)) lr = LowRank(B.ones(3, 1), 2 * B.ones(3, 1)) wb = Woodbury(diag, lr) assert wb.diag is diag assert wb.lr is lr
def construct(diag, left, right, middle): return Diagonal(diag) + LowRank(left, right, middle)
# Test giving a name to the constructor. p3 = GP(EQ(), name="yet_another_name", measure=m) assert m["yet_another_name"] is p3 assert p3.name == "yet_another_name" assert m[p3] == "yet_another_name" @pytest.mark.parametrize( "generate_noise_tuple", [ lambda x: (), lambda x: (B.rand(), ), lambda x: (B.rand(x), ), lambda x: (B.diag(B.rand(x)), ), lambda x: (Diagonal(B.rand(x)), ), ], ) def test_conditioning(generate_noise_tuple): m = Measure() p1 = GP(EQ(), measure=m) p2 = GP(Exp(), measure=m) p_sum = p1 + p2 # Sample some data to condition on. x1 = B.linspace(0, 2, 3) n1 = generate_noise_tuple(x1) y1 = p1(x1, *n1).sample() tup1 = (p1(x1, *n1), y1) x_sum = B.linspace(3, 5, 3) n_sum = generate_noise_tuple(x_sum)
def test_structured(): assert structured(Diagonal(B.ones(3))) assert not structured(Dense(B.ones(3, 3))) assert not structured(B.ones(3, 3))
def objective(vs): x_ind = vs.unbounded(x_ind_init, name="x_ind") return -construct_model(vs).logpdf(x_data, y_data, x_ind=x_ind) minimise_l_bfgs_b(objective, vs, trace=True, iters=args.i) # Print variables. vs.print() def cov_to_corr(k): std = B.sqrt(B.diag(k)) return k / std[:, None] / std[None, :] # Compute correlations between simulators. u = Dense(vs["sims/u"]) s_sqrt = Diagonal(vs["sims/s_sqrt"]) k = u @ s_sqrt @ s_sqrt @ u.T std = B.sqrt(B.diag(k)) corr_learned = cov_to_corr(k) # Compute empirical correlations. all_obs = np.concatenate( [sim.to_numpy()[:args.n].reshape(-1, 1) for sim in sims.values()], axis=1) corr_empirical = cov_to_corr(np.cov(all_obs.T)) # Compute predictions for latent processes. model = construct_model(vs) model = model.condition(x_data, y_data, x_ind=vs["x_ind"]) x_proj, y_proj, _, _ = model.project(x_data, y_data) means, lowers, uppers = model.model.predict(x_proj)
def test_diagonal_formatting(): assert str(Diagonal(B.ones(3))) == "<diagonal matrix: shape=3x3, dtype=float64>" assert ( repr(Diagonal(B.ones(3))) == "<diagonal matrix: shape=3x3, dtype=float64\n" " diag=[1. 1. 1.]>" )
def test_conversion_to_diagonal(dense1): approx(Diagonal(dense1), B.diag(B.diag(dense1)))
def generate(code): """Generate a random tensor of a particular type, specified with a code. Args: code (str): Code of the matrix. Returns: tensor: Random tensor. """ mat_code, shape_code = code.split(":") # Parse shape. if shape_code == "": shape = () else: shape = tuple(int(d) for d in shape_code.split(",")) if mat_code == "randn": return B.randn(*shape) elif mat_code == "randn_pd": mat = B.randn(*shape) # If it is a scalar or vector, just pointwise square it. if len(shape) in {0, 1}: return mat**2 + 1 else: return B.matmul(mat, mat, tr_b=True) + B.eye(shape[0]) elif mat_code == "zero": return Zero(B.default_dtype, *shape) elif mat_code == "const": return Constant(B.randn(), *shape) elif mat_code == "const_pd": return Constant(B.randn()**2 + 1, *shape) elif mat_code == "lt": mat = B.vec_to_tril(B.randn(int(0.5 * shape[0] * (shape[0] + 1)))) return LowerTriangular(mat) elif mat_code == "lt_pd": mat = generate(f"randn_pd:{shape[0]},{shape[0]}") return LowerTriangular(B.cholesky(B.reg(mat))) elif mat_code == "ut": mat = B.vec_to_tril(B.randn(int(0.5 * shape[0] * (shape[0] + 1)))) return UpperTriangular(B.transpose(mat)) elif mat_code == "ut_pd": mat = generate(f"randn_pd:{shape[0]},{shape[0]}") return UpperTriangular(B.transpose(B.cholesky(B.reg(mat)))) elif mat_code == "dense": return Dense(generate(f"randn:{shape_code}")) elif mat_code == "dense_pd": return Dense(generate(f"randn_pd:{shape_code}")) elif mat_code == "diag": return Diagonal(generate(f"randn:{shape_code}")) elif mat_code == "diag_pd": return Diagonal(generate(f"randn_pd:{shape_code}")) else: raise RuntimeError(f'Cannot parse generation code "{code}".')
def test_kronecker_attributes(): left = Diagonal(B.ones(2)) right = Diagonal(B.ones(3)) kron = Kronecker(left, right) assert kron.left is left assert kron.right is right
def test_diagonal_attributes(): diag = Diagonal(B.ones(3)) approx(diag.diag, B.ones(3))
def __call__(self, x, y): if x is y and B.shape(uprank(x))[0] == B.shape(self.noises)[0]: return Diagonal(self.noises) else: x, y = uprank(x), uprank(y) return Zero(B.dtype(x), B.shape(x)[0], B.shape(y)[0])