def test_conditioning(): graph = Graph() f1, e1 = GP(EQ(), graph=graph), GP(1e-8 * Delta(), graph=graph) f2, e2 = GP(EQ(), graph=graph), GP(2e-8 * Delta(), graph=graph) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) x = array([[1], [2], [3]]) y = array([[4, 5], [6, 7], [8, 9]]) gpar = gpar | (x, y) # Extract posterior processes. f1_post, e1_post = gpar.layers[0]() f2_post, e2_post = gpar.layers[1]() # Test independence of noises. yield eq, graph.kernels[f1_post, e1_post], ZeroKernel() yield eq, graph.kernels[f2_post, e2_post], ZeroKernel() # Test form of noises. yield eq, e1.mean, e1_post.mean yield eq, e1.kernel, e1_post.kernel yield eq, e2.mean, e2_post.mean yield eq, e2.kernel, e2_post.kernel # Test posteriors. yield approx, f1_post.mean(x), y[:, 0:1] yield approx, f2_post.mean(B.concat([x, y[:, 0:1]], axis=1)), y[:, 1:2]
def objective(vs, m, x_data, y_data, locs): """NLML objective. Args: vs (:class:`varz.Vars`): Variable container. m (int): Number of latent processes. x_data (tensor): Time stamps of the observations. y_data (tensor): Observations. locs (tensor): Spatial locations of observations. Returns: scalar: Negative log-marginal likelihood. """ y_proj, _, S, noises_obs = project(vs, m, y_data, locs) xs, noise_obs, noises_latent = model(vs, m) # Add contribution of latent processes. lml = 0 for i, (x, y) in enumerate(zip(xs, y_proj)): e_signal = GP((noise_obs / S[i] + noises_latent[i]) * Delta(), graph=x.graph) lml += (x + e_signal)(x_data).logpdf(y) e_noise = GP(noise_obs / S[i] * Delta(), graph=x.graph) lml -= e_noise(x_data).logpdf(y) # Add regularisation contribution. lml += B.sum(Normal(Diagonal(noises_obs)).logpdf(B.transpose(y_data))) # Return negative the evidence, normalised by the number of data points. n, p = B.shape(y_data) return -lml / (n * p)
def test_conditioning(): graph = Graph() f1, e1 = GP(EQ(), graph=graph), GP(1e-8 * Delta(), graph=graph) f2, e2 = GP(EQ(), graph=graph), GP(2e-8 * Delta(), graph=graph) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) x = tensor([[1], [2], [3]]) y = tensor([[4, 5], [6, 7], [8, 9]]) gpar = gpar | (x, y) # Extract posterior processes. f1_post, e1_post = gpar.layers[0]() f2_post, e2_post = gpar.layers[1]() # Test independence of noises. assert graph.kernels[f1_post, e1_post] == ZeroKernel() assert graph.kernels[f2_post, e2_post] == ZeroKernel() # Test form of noises. assert e1.mean == e1_post.mean assert e1.kernel == e1_post.kernel assert e2.mean == e2_post.mean assert e2.kernel == e2_post.kernel # Test posteriors. approx(f1_post.mean(x), y[:, 0:1]) approx(f2_post.mean(B.concat(x, y[:, 0:1], axis=1)), y[:, 1:2])
def test_logpdf(): graph = Graph() f1, e1 = GP(EQ(), graph=graph), GP(2e-1 * Delta(), graph=graph) f2, e2 = GP(Linear(), graph=graph), GP(1e-1 * Delta(), graph=graph) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) # Sample some data from GPAR. x = B.linspace(0, 2, 10, dtype=torch.float64)[:, None] y = gpar.sample(x, latent=True) # Compute logpdf. logpdf1 = (f1 + e1)(x).logpdf(y[:, 0]) logpdf2 = (f2 + e2)(B.concat([x, y[:, 0:1]], axis=1)).logpdf(y[:, 1]) # Test computation of GPAR. yield eq, gpar.logpdf(x, y), logpdf1 + logpdf2 yield eq, gpar.logpdf(x, y, only_last_layer=True), logpdf2 # Test resuming computation. x_int, x_ind_int = gpar.logpdf(x, y, return_inputs=True, outputs=[0]) yield eq, gpar.logpdf(x_int, y, x_ind=x_ind_int, outputs=[1]), logpdf2 # Test that sampling missing gives a stochastic estimate. y[1, 0] = np.nan yield ge, \ B.abs(gpar.logpdf(x, y, sample_missing=True) - gpar.logpdf(x, y, sample_missing=True)).numpy(), \ 1e-3
def __init__( self, measure: Measure, xs: List[GP], h: AbstractMatrix, noise_obs: B.Numeric, noises_latent: B.Numeric, ): self.measure = measure self.xs = xs self.h = h self.noise_obs = noise_obs self.noises_latent = noises_latent # Create noisy latent processes. xs_noisy = [ x + GP(self.noises_latent[i] * Delta(), measure=self.measure) for i, x in enumerate(xs) ] # Create noiseless observed processes. self.fs = _matmul(self.h, self.xs) # Create observed processes. fs_noisy = _matmul(self.h, xs_noisy) self.ys = [ f + GP(self.noise_obs * Delta(), measure=self.measure) for f in fs_noisy ]
def test_sample(): graph = Graph() x = array([1, 2, 3])[:, None] # Test that it produces random samples. Not sure how to test for # correctness. f1, e1 = GP(EQ(), graph=graph), GP(1e-1 * Delta(), graph=graph) f2, e2 = GP(EQ(), graph=graph), GP(1e-1 * Delta(), graph=graph) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) yield ge, B.sum(B.abs(gpar.sample(x) - gpar.sample(x))), 1e-3 yield ge, \ B.sum(B.abs(gpar.sample(x, latent=True) - gpar.sample(x, latent=True))), \ 1e-3 # Test that posterior latent samples are around the data that is # conditioned on. graph = Graph() f1, e1 = GP(EQ(), graph=graph), GP(1e-8 * Delta(), graph=graph) f2, e2 = GP(EQ(), graph=graph), GP(1e-8 * Delta(), graph=graph) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) y = gpar.sample(x, latent=True) gpar = gpar | (x, y) yield approx, gpar.sample(x), y, 3 yield approx, gpar.sample(x, latent=True), y, 3
def test_conditioning(x, w): prior = Measure() f1, e1 = GP(EQ(), measure=prior), GP(1e-10 * Delta(), measure=prior) f2, e2 = GP(EQ(), measure=prior), GP(2e-10 * Delta(), measure=prior) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) # Generate some data. y = B.concat((f1 + e1)(x).sample(), (f2 + e2)(x).sample(), axis=1) # Extract posterior processes. gpar = gpar | (x, y, w) f1_post, e1_post = gpar.layers[0]() f2_post, e2_post = gpar.layers[1]() # Test independence of noises. assert f1_post.measure.kernels[f1_post, e1_post] == ZeroKernel() assert f2_post.measure.kernels[f2_post, e2_post] == ZeroKernel() # Test form of noises. assert e1.mean == e1_post.mean assert e1.kernel == e1_post.kernel assert e2.mean == e2_post.mean assert e2.kernel == e2_post.kernel # Test posteriors. approx(f1_post.mean(x), y[:, 0:1], atol=1e-3) approx(f2_post.mean(B.concat(x, y[:, 0:1], axis=1)), y[:, 1:2], atol=1e-3)
def test_logpdf(x, w): prior = Measure() f1, e1 = GP(EQ(), measure=prior), GP(2e-1 * Delta(), measure=prior) f2, e2 = GP(Linear(), measure=prior), GP(1e-1 * Delta(), measure=prior) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) # Generate some data. y = gpar.sample(x, w, latent=True) # Compute logpdf. x1 = WeightedUnique(x, w[:, 0]) x2 = WeightedUnique(B.concat(x, y[:, 0:1], axis=1), w[:, 1]) logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0]) logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1]) # Test computation of GPAR. assert gpar.logpdf(x, y, w) == logpdf1 + logpdf2 assert gpar.logpdf(x, y, w, only_last_layer=True) == logpdf2 # Test resuming computation. x_partial, x_ind_partial = gpar.logpdf(x, y, w, return_inputs=True, outputs=[0]) assert gpar.logpdf(x_partial, y, w, x_ind=x_ind_partial, outputs=[1]) == logpdf2 # Test that sampling missing gives a stochastic estimate. y[1, 0] = np.nan all_different( gpar.logpdf(x, y, w, sample_missing=True), gpar.logpdf(x, y, w, sample_missing=True), )
def test_obs(x): prior = Measure() f = GP(EQ(), measure=prior) e = GP(1e-1 * Delta(), measure=prior) # Generate some data. w = B.rand(B.shape(x)[0]) + 1e-2 y = f(x).sample() # Set some observations to be missing. y_missing = y.copy() y_missing[::2] = np.nan # Check dense case. gpar = GPAR() obs = gpar._obs(x, None, y_missing, w, f, e) assert isinstance(obs, Obs) approx( prior.logpdf(obs), (f + e)(WeightedUnique(x[1::2], w[1::2])).logpdf(y[1::2]), atol=1e-6, ) # Check sparse case. gpar = GPAR(x_ind=x) obs = gpar._obs(x, x, y_missing, w, f, e) assert isinstance(obs, SparseObs) approx( prior.logpdf(obs), (f + e)(WeightedUnique(x[1::2], w[1::2])).logpdf(y[1::2]), atol=1e-6, )
def test_sample(x, w): prior = Measure() # Test that it produces random samples. f1, e1 = GP(EQ(), measure=prior), GP(1e-1 * Delta(), measure=prior) f2, e2 = GP(EQ(), measure=prior), GP(2e-1 * Delta(), measure=prior) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) all_different(gpar.sample(x, w), gpar.sample(x, w)) all_different(gpar.sample(x, w, latent=True), gpar.sample(x, w, latent=True)) # Test that posterior latent samples are around the data that is conditioned on. prior = Measure() f1, e1 = GP(EQ(), measure=prior), GP(1e-10 * Delta(), measure=prior) f2, e2 = GP(EQ(), measure=prior), GP(2e-10 * Delta(), measure=prior) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) y = gpar.sample(x, w, latent=True) gpar = gpar | (x, y, w) approx(gpar.sample(x, w), y, atol=1e-3) approx(gpar.sample(x, w, latent=True), y, atol=1e-3)
def predict(vs, m, x_data, y_data, locs, x_pred): """Make predictions. Args: vs (:class:`varz.Vars`): Variable container. m (int): Number of latent processes. x_data (tensor): Time stamps of the observations. y_data (tensor): Observations. locs (tensor): Spatial locations of observations. x_pred (tensor): Time stamps to predict at. Returns: tuple: Tuple containing the predictions for the latent processes and predictions for the observations. """ # Construct model and project data for prediction. xs, noise_obs, noises_latent = model(vs, m) y_proj, H, S, noises_obs = project(vs, m, y_data, locs) L = noise_obs / S + noises_latent # Condition latent processes. xs_posterior = [] for x, noise, y in zip(xs, L, y_proj): e = GP(noise * Delta(), graph=x.graph) xs_posterior.append(x | ((x + e)(x_data), y)) xs = xs_posterior # Extract posterior means and variances of the latent processes. x_means, x_vars = zip(*[(x.mean(x_pred)[:, 0], x.kernel.elwise(x_pred)[:, 0]) for x in xs]) # Construct predictions for latent processes. lat_preds = [ B.to_numpy(mean, mean - 2 * (var + L[i])**.5, mean + 2 * (var + L[i])**.5) for i, (mean, var) in enumerate(zip(x_means, x_vars)) ] # Pull means through mixing matrix. x_means = B.stack(*x_means, axis=0) y_means = B.matmul(H, x_means) # Pull variances through mixing matrix and add noise. x_vars = B.stack(*x_vars, axis=0) y_vars = B.matmul(H**2, x_vars + noises_latent[:, None]) + noise_obs # Construct predictions for observations. obs_preds = [(mean, mean - 2 * var**.5, mean + 2 * var**.5) for mean, var in zip(y_means, y_vars)] return lat_preds, obs_preds
def test_obs(): graph = Graph() f = GP(EQ(), graph=graph) e = GP(1e-8 * Delta(), graph=graph) # Check that it produces the correct observations. x = B.linspace(0, 0.1, 10, dtype=torch.float64) y = f(x).sample() # Set some observations to be missing. y_missing = y.clone() y_missing[::2] = np.nan # Check dense case. gpar = GPAR() obs = gpar._obs(x, None, y_missing, f, e) yield eq, type(obs), Obs yield approx, y, (f | obs).mean(x) # Check sparse case. gpar = GPAR(x_ind=x) obs = gpar._obs(x, x, y_missing, f, e) yield eq, type(obs), SparseObs yield approx, y, (f | obs).mean(x)
import matplotlib.pyplot as plt from wbml.plot import tweak from stheno import B, Measure, GP, EQ, Delta # Define points to predict at. x = B.linspace(0, 10, 100) x_obs = B.linspace(0, 10, 20) # Constuct a prior: prior = Measure() w = lambda x: B.exp(-(x**2) / 0.5) # Window b = [(w * GP(EQ(), measure=prior)).shift(xi) for xi in x_obs] # Weighted basis funs f = sum(b) # Latent function e = GP(Delta(), measure=prior) # Noise y = f + 0.2 * e # Observation model # Sample a true, underlying function and observations. f_true, y_obs = prior.sample(f(x), y(x_obs)) # Condition on the observations to make predictions. post = prior | (y(x_obs), y_obs) # Plot result. for i, bi in enumerate(b): mean, lower, upper = post(bi(x)).marginals() kw_args = {"label": "Basis functions"} if i == 0 else {} plt.plot(x, mean, style="pred2", **kw_args) plt.plot(x, f_true, label="True", style="test") plt.scatter(x_obs, y_obs, label="Observations", style="train", s=20)
import matplotlib.pyplot as plt import numpy as np import wbml.plot from stheno import GP, EQ, Delta, model # Define points to predict at. x = np.linspace(0, 10, 100) x_obs = np.linspace(0, 7, 20) # Construct a prior. f = GP(EQ().periodic(5.)) # Latent function. e = GP(Delta()) # Noise. y = f + .5 * e # Sample a true, underlying function and observations. f_true, y_obs = model.sample(f(x), y(x_obs)) # Now condition on the observations to make predictions. mean, lower, upper = (f | (y(x_obs), y_obs))(x).marginals() # Plot result. plt.plot(x, f_true, label='True', c='tab:blue') plt.scatter(x_obs, y_obs, label='Observations', c='tab:red') plt.plot(x, mean, label='Prediction', c='tab:green') plt.plot(x, lower, ls='--', c='tab:green') plt.plot(x, upper, ls='--', c='tab:green') wbml.plot.tweak() plt.savefig('readme_example1_simple_regression.png') plt.show()
( Exp(), lambda scheme: RGPCM( scheme=scheme, window=window, scale=scale, noise=noise, n_u=n_u, m_max=n_z // 2, t=t, ), ), ]: # Sample data. gp_f = GP(kernel) gp_y = gp_f + GP(noise * Delta(), measure=gp_f.measure) f, y = gp_f.measure.sample(gp_f(t), gp_y(t)) f, y = B.flatten(f), B.flatten(y) wd.save( { "t": t, "f": f, "k": B.flatten(kernel(t_k, 0)), "y": y, "true_logpdf": gp_y(t).logpdf(y), }, slugify(str(kernel)), "data.pickle", ) for scheme in ["mean-field", "structured"]:
n = 881 # Add last one for `linspace`. noise = 0.1 t = B.linspace(-44, 44, n) t_plot = B.linspace(44, 44, 500) # Setup true model and GPCM models. kernel = EQ() window = 2 scale = 1 n_u = 40 n_z = 88 # Sample data. m = Measure() gp_f = GP(kernel, measure=m) gp_y = gp_f + GP(noise * Delta(), measure=m) truth, y = map(B.flatten, m.sample(gp_f(t_plot), gp_y(t))) # Remove region [-8.8, 8.8]. inds = ~((t >= -8.8) & (t <= 8.8)) t = t[inds] y = y[inds] def comparative_kernel(vs_): return vs_.pos(1) * kernel.stretch(vs_.pos(1.0)) + vs_.pos(noise) * Delta() run( args=args, wd=wd,
# Define points to predict at. x = np.linspace(0, 10, 100) x_obs = np.linspace(0, 10, 10) # Model parameters: m = 2 p = 4 H = np.random.randn(p, m) # Construct latent functions us = VGP(m, EQ()) fs = us.lmatmul(H) # Construct noise. e = VGP(p, 0.5 * Delta()) # Construct observation model. ys = e + fs # Sample a true, underlying function and observations. fs_true = fs.sample(x) ys_obs = (ys | fs.obs(x, fs_true)).sample(x_obs) # Condition the model on the observations to make predictions. preds = (fs | ys.obs(x_obs, ys_obs)).marginals(x) # Plot results. def plot_prediction(x, f, pred, x_obs=None, y_obs=None): plt.plot(x, f, label='True', c='tab:blue')
import matplotlib.pyplot as plt import numpy as np from stheno import GP, Delta, model, Obs, dense # Define points to predict at. x = np.linspace(0, 10, 200) x_obs = np.linspace(0, 10, 10) # Construct the model. slope = GP(1) intercept = GP(5) f = slope * (lambda x: x) + intercept e = 0.2 * GP(Delta()) # Noise model y = f + e # Observation model # Sample a slope, intercept, underlying function, and observations. true_slope, true_intercept, f_true, y_obs = \ model.sample(slope(0), intercept(0), f(x), y(x_obs)) # Condition on the observations to make predictions. slope, intercept, f = (slope, intercept, f) | Obs(y(x_obs), y_obs) mean, lower, upper = f(x).marginals() print('true slope', true_slope) print('predicted slope', slope(0).mean) print('true intercept', true_intercept) print('predicted intercept', intercept(0).mean)
B.epsilon = 1e-10 # Define points to predict at. x = B.linspace(0, 10, 200) x_obs = B.linspace(0, 7, 50) with Measure() as prior: # Construct a latent function consisting of four different components. f_smooth = GP(EQ()) f_wiggly = GP(RQ(1e-1).stretch(0.5)) f_periodic = GP(EQ().periodic(1.0)) f_linear = GP(Linear()) f = f_smooth + f_wiggly + f_periodic + 0.2 * f_linear # Let the observation noise consist of a bit of exponential noise. e_indep = GP(Delta()) e_exp = GP(Exp()) e = e_indep + 0.3 * e_exp # Sum the latent function and observation noise to get a model for the observations. y = f + 0.5 * e # Sample a true, underlying function and observations. ( f_true_smooth, f_true_wiggly, f_true_periodic, f_true_linear, f_true, y_obs, ) = prior.sample(f_smooth(x), f_wiggly(x), f_periodic(x), f_linear(x), f(x),
# Define points to predict at. x = B.linspace(0, 10, 200) x_obs = B.linspace(0, 7, 50) # Construct a latent function consisting of four different components. prior = Measure() f_smooth = GP(EQ(), measure=prior) f_wiggly = GP(RQ(1e-1).stretch(0.5), measure=prior) f_periodic = GP(EQ().periodic(1.0), measure=prior) f_linear = GP(Linear(), measure=prior) f = f_smooth + f_wiggly + f_periodic + 0.2 * f_linear # Let the observation noise consist of a bit of exponential noise. e_indep = GP(Delta(), measure=prior) e_exp = GP(Exp(), measure=prior) e = e_indep + 0.3 * e_exp # Sum the latent function and observation noise to get a model for the observations. y = f + 0.5 * e # Sample a true, underlying function and observations. ( f_true_smooth, f_true_wiggly, f_true_periodic, f_true_linear, f_true, y_obs,
def root(a): u, s_diag, _ = np.linalg.svd(a) return u.dot(np.diag(s_diag**.5)).dot(u.T) def fp_difference(K, *Ks): root_K = root(K) return K - np.mean([root(root_K @ Ki @ root_K) for Ki in Ks], axis=0) t_max = 5 n = 400 # k1 = EQ().stretch(t_max / 2) * EQ().periodic(1) + 1e-6 * Delta() # k2 = EQ().stretch(t_max / 2) * EQ().periodic(1.8) + 1e-6 * Delta() k1 = EQ().periodic(0.8) + 1e-6 * Delta() k2 = EQ().periodic(0.85) + 1e-6 * Delta() k3 = EQ().periodic(0.9) + 1e-6 * Delta() x = np.linspace(0, t_max, n) K1 = random_K(n) K2 = random_K(n) K3 = random_K(n) # K1, K2, K3 = random_K(n), random_K(n), random_K(n) # Solve. print('Computing...') C1 = spa.solve_continuous_are(np.zeros((n, n)), np.linalg.cholesky(K1), K3, np.eye(n))
def comparative_kernel(vs_): return vs_.pos(1) * kernel.stretch(vs_.pos(1.0)) + vs_.pos(noise) * Delta()
# Define points to predict at. x = B.linspace(0, 10, 100) x_obs = B.linspace(0, 10, 10) # Model parameters: m = 2 p = 4 H = B.randn(p, m) # Construct latent functions. prior = Measure() us = VGP([GP(EQ(), measure=prior) for _ in range(m)]) fs = us.lmatmul(H) # Construct noise. e = VGP([GP(0.5 * Delta(), measure=prior) for _ in range(p)]) # Construct observation model. ys = e + fs # Sample a true, underlying function and observations. samples = prior.sample(*(p(x) for p in fs.ps), *(p(x_obs) for p in ys.ps)) fs_true, ys_obs = samples[:p], samples[p:] # Compute the posterior and make predictions. post = prior | (*((p(x_obs), y_obs) for p, y_obs in zip(ys.ps, ys_obs)), ) preds = [post(p(x)).marginals() for p in fs.ps] # Plot results. def plot_prediction(x, f, pred, x_obs=None, y_obs=None):
import matplotlib.pyplot as plt import numpy as np from stheno import GP, EQ, Delta, Obs # Define points to predict at. x = np.linspace(0, 10, 200) x_obs = np.linspace(0, 10, 10) # Construct the model. f = 0.7 * GP(EQ()).stretch(1.5) e = 0.2 * GP(Delta()) # Construct derivatives via finite differences. df = f.diff_approx(1) ddf = f.diff_approx(2) dddf = f.diff_approx(3) + e # Fix the integration constants. f, df, ddf, dddf = (f, df, ddf, dddf) | Obs((f(0), 1), (df(0), 0), (ddf(0), -1)) # Sample observations. y_obs = np.sin(x_obs) + 0.2 * np.random.randn(*x_obs.shape) # Condition on the observations to make predictions. f, df, ddf, dddf = (f, df, ddf, dddf) | Obs(dddf(x_obs), y_obs) # And make predictions. pred_iiif = f(x).marginals() pred_iif = df(x).marginals()
import wbml.out as out from wbml.plot import tweak from stheno import B, Measure, GP, Delta # Define points to predict at. x = B.linspace(0, 10, 200) x_obs = B.linspace(0, 10, 10) # Construct the model. prior = Measure() slope = GP(1, measure=prior) intercept = GP(5, measure=prior) f = slope * (lambda x: x) + intercept e = 0.2 * GP(Delta(), measure=prior) # Noise model y = f + e # Observation model # Sample a slope, intercept, underlying function, and observations. true_slope, true_intercept, f_true, y_obs = prior.sample( slope(0), intercept(0), f(x), y(x_obs)) # Condition on the observations to make predictions. post = prior | (y(x_obs), y_obs) mean, lower, upper = post(f(x)).marginals() out.kv("True slope", true_slope[0, 0]) out.kv("Predicted slope", post(slope(0)).mean[0, 0]) out.kv("True intercept", true_intercept[0, 0]) out.kv("Predicted intercept", post(intercept(0)).mean[0, 0])