def test_normal_kl(normal1, normal2): assert normal1.kl(normal1) < 1e-6 assert normal1.kl(normal2) > 0.1 # Test against Monte Carlo estimate. samples = normal1.sample(50_000) kl_est = B.mean(normal1.logpdf(samples)) - B.mean(normal2.logpdf(samples)) kl = normal1.kl(normal2) approx(kl_est, kl, rtol=0.05)
def test_normal_sampling(): for mean in [0, 1]: dist = Normal(mean, 3 * B.eye(np.int32, 200)) # Sample without noise. samples = dist.sample(2000) approx(B.mean(samples), mean, atol=5e-2) approx(B.std(samples)**2, 3, atol=5e-2) # Sample with noise samples = dist.sample(2000, noise=2) approx(B.mean(samples), mean, atol=5e-2) approx(B.std(samples)**2, 5, atol=5e-2)
def test_natural_normal(): chol = B.randn(2, 2) dist = Normal(B.randn(2, 1), B.reg(chol @ chol.T, diag=1e-1)) nat = NaturalNormal.from_normal(dist) # Test properties. assert dist.dtype == nat.dtype for name in ["dim", "mean", "var", "m2"]: approx(getattr(dist, name), getattr(nat, name)) # Test sampling. state = B.create_random_state(dist.dtype, seed=0) state, sample = nat.sample(state, num=1_000_000) emp_mean = B.mean(B.dense(sample), axis=1, squeeze=False) emp_var = (sample - emp_mean) @ (sample - emp_mean).T / 1_000_000 approx(dist.mean, emp_mean, rtol=5e-2) approx(dist.var, emp_var, rtol=5e-2) # Test KL. chol = B.randn(2, 2) other_dist = Normal(B.randn(2, 1), B.reg(chol @ chol.T, diag=1e-2)) other_nat = NaturalNormal.from_normal(other_dist) approx(dist.kl(other_dist), nat.kl(other_nat)) # Test log-pdf. x = B.randn(2, 1) approx(dist.logpdf(x), nat.logpdf(x))
def test_ess(): # Construct a prior and a likelihood. prior = Normal(np.array([[0.6, 0.3], [0.3, 0.6]])) lik = Normal( np.array([[0.2], [0.3]]), np.array([[1, 0.2], [0.2, 1]]), ) # Perform sampling. sampler = ESS(lik.logpdf, prior.sample) num_samples = 30_000 samples = B.concat(*sampler.sample(num=num_samples), axis=1) samples_mean = B.mean(samples, axis=1)[:, None] samples_cov = ( B.matmul(samples - samples_mean, samples - samples_mean, tr_b=True) / num_samples) # Compute posterior statistics. prec_prior = B.inv(prior.var) prec_lik = B.inv(lik.var) cov = B.inv(prec_prior + prec_lik) mean = cov @ (prec_prior @ prior.mean + prec_lik @ lik.mean) approx(samples_cov, cov, atol=5e-2) approx(samples_mean, mean, atol=5e-2)
def step(self, x, grad): """Perform a gradient step. Args: x (tensor): Current input value. This value will be updated in-place. grad (tensor): Current gradient. Returns: tensor: `x` after updating `x` in-place. """ if self.m is None or self.v is None: self.m = B.zeros(x) self.v = B.zeros(x) # Update estimates of moments. self.m *= self.beta1 self.m += (1 - self.beta1) * grad self.v *= self.beta2 self.v += (1 - self.beta2) * grad ** 2 # Correct for bias of initialisation. m_corr = self.m / (1 - self.beta1 ** (self.i + 1)) v_corr = self.v / (1 - self.beta2 ** (self.i + 1)) # Perform update. if self.local_rates: denom = B.sqrt(B.mean(v_corr)) + self.epsilon else: denom = B.sqrt(v_corr) + self.epsilon x -= self.rate * m_corr / denom # Increase iteration number. self.i += 1 return x
def test_normal_sampling(): for mean in [0, 1]: dist = Normal(mean, 3 * B.eye(np.int32, 200)) # Sample without noise. samples = dist.sample(2000) approx(B.mean(samples), mean, atol=5e-2) approx(B.std(samples) ** 2, 3, atol=5e-2) # Sample with noise samples = dist.sample(2000, noise=2) approx(B.mean(samples), mean, atol=5e-2) approx(B.std(samples) ** 2, 5, atol=5e-2) state, sample1 = dist.sample(B.create_random_state(B.dtype(dist), seed=0)) state, sample2 = dist.sample(B.create_random_state(B.dtype(dist), seed=0)) assert isinstance(state, B.RandomState) approx(sample1, sample2)
def elbo(lik, p, q, num_samples=1): """Construct the ELBO. Args: lik (function): Likelihood that taken in one or more samples from the approximate posterior. p (distribution): Prior. q (distribution): Approximate posterior. num_samples (int, optional): Number of samples. Defaults to `1`. Returns: tensor: ELBO. """ samples = q.sample(num_samples) log_lik = B.mean(lik(samples)) log_p = B.mean(p.logpdf(samples)) log_q = B.mean(q.logpdf(samples)) return log_lik + log_p - log_q
def test_normalise(): layer = Normalise(epsilon=0) x = B.randn(10, 5, 3) # Check number of weights and width. assert layer.num_weights(10) == 0 assert layer.width == 10 # Check initialisation and width. layer.initialise(3, None) assert layer.width == 3 # Check correctness out = layer(x) approx(B.std(out, axis=2), B.ones(10, 5), rtol=1e-4) approx(B.mean(out, axis=2), B.zeros(10, 5), atol=1e-4)
def summarise_samples(x, samples, db=False): """Summarise samples. Args: x (vector): Inputs of samples. samples (tensor): Samples, with the first dimension corresponding to different samples. db (bool, optional): Convert to decibels. Returns: :class:`collections.namedtuple`: Named tuple containing various statistics of the samples. """ x, samples = B.to_numpy(x, samples) random_inds = np.random.permutation(B.shape(samples)[0])[:3] def transform(x): if db: return 10 * np.log10(x) else: return x perm = tuple(reversed(range(B.rank(samples)))) # Reverse all dimensions. return collect( x=B.to_numpy(x), mean=transform(B.mean(samples, axis=0)), var=transform(B.std(samples, axis=0))**2, err_68_lower=transform(B.quantile(samples, 0.32, axis=0)), err_68_upper=transform(B.quantile(samples, 1 - 0.32, axis=0)), err_95_lower=transform(B.quantile(samples, 0.025, axis=0)), err_95_upper=transform(B.quantile(samples, 1 - 0.025, axis=0)), err_99_lower=transform(B.quantile(samples, 0.0015, axis=0)), err_99_upper=transform(B.quantile(samples, 1 - 0.0015, axis=0)), samples=transform(B.transpose(samples, perm=perm)[..., random_inds]), all_samples=transform(B.transpose(samples, perm=perm)), )
model_ks, model_psds = wd.load("samples.pickle") # Plot. plt.figure(figsize=(15, 2.5)) for i, (model, (x, ks)) in enumerate(zip(models, model_ks)): plt.subplot(1, 6, 1 + i) for q in [1, 5, 10, 20, 30, 40]: plt.fill_between( x, B.quantile(ks, q / 100, axis=1), B.quantile(ks, 1 - q / 100, axis=1), facecolor="tab:blue", alpha=0.2, ) plt.plot(x, B.mean(ks, axis=1), c="black") if hasattr(model, "t_u"): plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black") plt.title(model.name + " (Kernel)") plt.xlabel("Time (s)") plt.xlim(-1.5, 1.5) plt.ylim(-0.5, 1.25) tweak(legend=False) for i, (model, (freqs, psds)) in enumerate(zip(models, model_psds)): plt.subplot(1, 6, 4 + i) def apply_to_psd(f): raw = 10**(psds / 10) return 10 * B.log(f(raw)) / B.log(10)
def f_i(x): return B.mean( lab_f(*(args_[:i] + (x, ) + args_[i + 1:]), **kw_args))
import lab as B import numpy as np from experiments.experiment import run, setup from wbml.data.mauna_loa import load args, wd = setup("mauna_loa") n = 200 data = load(detrend_method="gp") t = np.array(data.index)[-n:] t = t - t[0] y = np.array(data["ppm_detrended"])[-n:] # Normalise to zero mean and unity variance. y -= B.mean(y) y /= B.std(y) # Setup GPCM models. noise = 0.05 window = 5 scale = 1 / 12 run( args=args, wd=wd, noise=noise, window=window, scale=scale, fix_window_scale=True, t=t, y=y,
def equal(index_a, index_b): dist = B.mean(B.subtract(a[..., :, index_a], b[..., :, index_b])**2) return dist < 1e-10
def __call__(self, x): mean = B.mean(x, axis=2)[:, :, None] std = B.std(x, axis=2)[:, :, None] return (x - mean) / (std + self.epsilon)
def elbo(lik, p: Normal, q: Normal, num_samples=1): return B.mean(lik(q.sample(num_samples))) - q.kl(p)