def __init__(self, prior, likelihood=None): # Normal-Wishart conjugate self.prior = prior # Normal-Wishart posterior self.posterior = copy.deepcopy(prior) # Gaussian likelihood if likelihood is not None: self.likelihood = likelihood else: mu, lmbda = self.prior.rvs() self.likelihood = GaussianWithPrecision(mu=mu, lmbda=lmbda)
def _expected_log_likelihood(self, x): # Natural parameter of marginal log-distirbution # are the expected statsitics of the posterior nat_param = self.expected_statistics() # Data statistics under a Gaussian likelihood # log-parition is subsumed into nat*stats liklihood = GaussianWithPrecision(mu=np.empty_like(nat_param[0])) stats = liklihood.statistics(x, vectorize=True) log_base = liklihood.log_base() return log_base + np.einsum('k,nk->n', nat_param[0], stats[0])\ + nat_param[1] * stats[1] + nat_param[3] * stats[3]\ + np.einsum('kh,nkh->n', nat_param[2], stats[2])
def plot_data(self, type='raw', axs=None, show=True): import matplotlib.pyplot as plt if axs is None: nb_plots = self.dm_state + self.dm_act _, axs = plt.subplots(nb_plots, figsize=(8, 12)) if type == 'raw': for k in range(self.dm_state): axs[k].plot(self.data['x'][k, ...]) for k in range(self.dm_act): axs[self.dm_state + k].plot(self.data['u'][k, ...]) elif type == 'dist': from mimo.distributions import GaussianWithPrecision dist = GaussianWithPrecision(mu=np.zeros((1, ))) for k in range(self.dm_state): mus, sigmas = np.zeros((self.nb_steps, )), np.zeros( (self.nb_steps, )) for t in range(self.nb_steps): dist.max_likelihood(self.data['x'][k, t, :][:, None]) mus[t], sigmas[t] = dist.mu, dist.sigma t = np.linspace(0, self.nb_steps - 1, self.nb_steps) axs[k].plot(t, mus, color='k') lb = mus - 2. * np.sqrt(sigmas) ub = mus + 2. * np.sqrt(sigmas) axs[k].fill_between(t, lb, ub, color='k', alpha=0.1) dist = GaussianWithPrecision(mu=np.zeros((1, ))) for k in range(self.dm_act): mus, sigmas = np.zeros((self.nb_steps, )), np.zeros( (self.nb_steps, )) for t in range(self.nb_steps): dist.max_likelihood(self.data['u'][k, t, :][:, None]) mus[t], sigmas[t] = dist.mu, dist.sigma t = np.linspace(0, self.nb_steps - 1, self.nb_steps) axs[self.dm_state + k].plot(t, mus, color='k') lb = mus - 2. * np.sqrt(sigmas) ub = mus + 2. * np.sqrt(sigmas) axs[self.dm_state + k].fill_between(t, lb, ub, color='k', alpha=0.1) if show: plt.show() return axs
from mimo.distributions import Categorical from mimo.distributions import GaussianWithCovariance from mimo.distributions import GaussianWithPrecision from mimo.mixtures import MixtureOfGaussians npr.seed(1337) gating = Categorical(K=2) components = [GaussianWithCovariance(mu=np.array([1., 1.]), sigma=0.25 * np.eye(2)), GaussianWithCovariance(mu=np.array([-1., -1.]), sigma=0.5 * np.eye(2))] gmm = MixtureOfGaussians(gating=gating, components=components) obs, z = gmm.rvs(500) gmm.plot(obs) gating = Categorical(K=2) components = [GaussianWithPrecision(mu=npr.randn(2, ), lmbda=np.eye(2)) for _ in range(2)] model = MixtureOfGaussians(gating=gating, components=components) model.max_likelihood(obs, maxiter=500) plt.figure() model.plot(obs)
import numpy as np import numpy.random as npr from mimo.distributions import GaussianWithCovariance from mimo.distributions import GaussianWithPrecision from mimo.distributions import NormalWishart from mimo.distributions import GaussianWithNormalWishart npr.seed(1337) dim, nb_samples, nb_datasets = 3, 500, 5 dist = GaussianWithCovariance(mu=npr.randn(dim), sigma=1. * np.diag(npr.rand(dim))) data = [dist.rvs(size=nb_samples) for _ in range(nb_datasets)] print("True mean" + "\n", dist.mu.T, "\n" + "True sigma" + "\n", dist.sigma) model = GaussianWithPrecision(mu=np.zeros((dim, ))) model.max_likelihood(data) print("ML mean" + "\n", model.mu.T, "\n" + "ML sigma" + "\n", model.sigma) hypparams = dict(mu=np.zeros((dim, )), kappa=0.01, psi=np.eye(dim), nu=dim + 1) prior = NormalWishart(**hypparams) model = GaussianWithNormalWishart(prior=prior) model.max_aposteriori(data) print("MAP mean" + "\n", model.likelihood.mu.T, "\n" + "MAP sigma" + "\n", model.likelihood.sigma)
def __init__(self, mus, lmbda): self._lmbda = lmbda self.components = [ GaussianWithPrecision(mu=_mu, lmbda=lmbda) for _mu in mus ]
def log_posterior_predictive_gaussian(self, x): mu, lmbda = self.posterior_predictive_gaussian() return GaussianWithPrecision(mu=mu, lmbda=lmbda).log_likelihood(x)
class GaussianWithNormalWishart: """ Multivariate Gaussian distribution class. Uses a Normal-Wishart prior and posterior Parameters are mean and precision matrix: mu, lmbda """ def __init__(self, prior, likelihood=None): # Normal-Wishart conjugate self.prior = prior # Normal-Wishart posterior self.posterior = copy.deepcopy(prior) # Gaussian likelihood if likelihood is not None: self.likelihood = likelihood else: mu, lmbda = self.prior.rvs() self.likelihood = GaussianWithPrecision(mu=mu, lmbda=lmbda) def empirical_bayes(self, data): self.prior.nat_param = self.likelihood.statistics(data) self.likelihood.params = self.prior.rvs() return self # Max a posteriori def max_aposteriori(self, data, weights=None): stats = self.likelihood.statistics(data) if weights is None\ else self.likelihood.weighted_statistics(data, weights) self.posterior.nat_param = self.prior.nat_param + stats self.likelihood.params = self.posterior.mode( ) # mode of wishart might not exist return self # Gibbs sampling def resample(self, data=[]): stats = self.likelihood.statistics(data) self.posterior.nat_param = self.prior.nat_param + stats self.likelihood.params = self.posterior.rvs() return self # Mean field def meanfield_update(self, data, weights=None): stats = self.likelihood.statistics(data) if weights is None\ else self.likelihood.weighted_statistics(data, weights) self.posterior.nat_param = self.prior.nat_param + stats self.likelihood.params = self.posterior.rvs() return self def meanfield_sgdstep(self, data, weights, prob, stepsize): stats = self.likelihood.statistics(data) if weights is None\ else self.likelihood.weighted_statistics(data, weights) self.posterior.nat_param = (1. - stepsize) * self.posterior.nat_param\ + stepsize * (self.prior.nat_param + 1. / prob * stats) self.likelihood.params = self.posterior.rvs() return self def variational_lowerbound(self): q_entropy = self.posterior.entropy() qp_cross_entropy = self.posterior.cross_entropy(self.prior) return q_entropy - qp_cross_entropy def log_marginal_likelihood(self): log_partition_prior = self.prior.log_partition() log_partition_posterior = self.posterior.log_partition() return log_partition_posterior - log_partition_prior def posterior_predictive_gaussian(self): mu, kappa, psi, nu = self.posterior.params df = nu - self.likelihood.dim + 1 c = 1. + 1. / kappa lmbda = df * psi / c return mu, lmbda def log_posterior_predictive_gaussian(self, x): mu, lmbda = self.posterior_predictive_gaussian() return GaussianWithPrecision(mu=mu, lmbda=lmbda).log_likelihood(x) def posterior_predictive_studentt(self): mu, kappa, psi, nu = self.posterior.params df = nu - self.likelihood.dim + 1 c = 1. + 1. / kappa lmbda = df * psi / c return mu, lmbda, df def log_posterior_predictive_studentt(self, x): mu, lmbda, df = self.posterior_predictive_studentt() return mvt_logpdf(x, mu, lmbda, df)
def log_likelihood(self, x): mu, lmbda = x return GaussianWithPrecision(mu=self.gaussian.mu, lmbda=self.kappa * lmbda).log_likelihood(mu) \ + self.wishart.log_likelihood(lmbda)
def __init__(self, mu, kappa, psi, nu): self.gaussian = GaussianWithPrecision(mu=mu) self.wishart = Wishart(psi=psi, nu=nu) self.kappa = kappa
class NormalWishart(Distribution): def __init__(self, mu, kappa, psi, nu): self.gaussian = GaussianWithPrecision(mu=mu) self.wishart = Wishart(psi=psi, nu=nu) self.kappa = kappa @property def dim(self): return self.gaussian.dim @property def params(self): return self.gaussian.mu, self.kappa, self.wishart.psi, self.wishart.nu @params.setter def params(self, values): self.gaussian.mu, self.kappa, self.wishart.psi, self.wishart.nu = values def rvs(self, size=1): lmbda = self.wishart.rvs() self.gaussian.lmbda = self.kappa * lmbda mu = self.gaussian.rvs() return mu, lmbda def mean(self): return self.gaussian.mean(), self.wishart.mean() def mode(self): return self.gaussian.mode(), self.wishart.mode() def log_likelihood(self, x): mu, lmbda = x return GaussianWithPrecision(mu=self.gaussian.mu, lmbda=self.kappa * lmbda).log_likelihood(mu) \ + self.wishart.log_likelihood(lmbda) @property def base(self): return self.gaussian.base * self.wishart.base def log_base(self): return np.log(self.base) @property def nat_param(self): return self.std_to_nat(self.params) @nat_param.setter def nat_param(self, natparam): self.params = self.nat_to_std(natparam) @staticmethod def std_to_nat(params): # The definition of stats is slightly different # from literatur to make posterior updates easy # Assumed stats # stats = [lmbda @ x, # -0.5 * lmbda @ xxT, # -0.5 * lmbda, # 0.5 * logdet_lmbda] mu = params[1] * params[0] kappa = params[1] psi = invpd(params[2]) \ + params[1] * np.outer(params[0], params[0]) nu = params[3] - params[2].shape[0] return Stats([mu, kappa, psi, nu]) @staticmethod def nat_to_std(natparam): mu = natparam[0] / natparam[1] kappa = natparam[1] psi = invpd(natparam[2] - kappa * np.outer(mu, mu)) nu = natparam[3] + natparam[2].shape[0] return mu, kappa, psi, nu def log_partition(self, params=None): _, kappa, psi, nu = params if params is not None else self.params dim = self.dim if params else psi.shape[0] return - 0.5 * dim * np.log(kappa)\ + Wishart(psi=psi, nu=nu).log_partition() def expected_statistics(self): # stats = [lmbda @ x, # -0.5 * lmbda @ xxT, # -0.5 * lmbda, # 0.5 * logdet_lmbda] E_x = self.wishart.nu * self.wishart.psi @ self.gaussian.mu E_xLmbdaxT = -0.5 * (self.dim / self.kappa + self.gaussian.mu.dot(E_x)) E_lmbda = -0.5 * (self.wishart.nu * self.wishart.psi) E_logdet_lmbda = 0.5 * ( np.sum(digamma( (self.wishart.nu - np.arange(self.dim)) / 2.)) + self.dim * np.log(2.) + 2. * np.sum(np.log(np.diag(self.wishart.psi_chol)))) return E_x, E_xLmbdaxT, E_lmbda, E_logdet_lmbda def entropy(self): nat_param, stats = self.nat_param, self.expected_statistics() return self.log_partition() - self.log_base()\ - (np.dot(nat_param[0], stats[0]) + nat_param[1] * stats[1] + np.tensordot(nat_param[2], stats[2]) + nat_param[3] * stats[3]) def cross_entropy(self, dist): nat_param, stats = dist.nat_param, self.expected_statistics() return dist.log_partition() - dist.log_base() \ - (np.dot(nat_param[0], stats[0]) + nat_param[1] * stats[1] + np.tensordot(nat_param[2], stats[2]) + nat_param[3] * stats[3]) # This implementation is valid but terribly slow def _expected_log_likelihood(self, x): # Natural parameter of marginal log-distirbution # are the expected statsitics of the posterior nat_param = self.expected_statistics() # Data statistics under a Gaussian likelihood # log-parition is subsumed into nat*stats liklihood = GaussianWithPrecision(mu=np.empty_like(nat_param[0])) stats = liklihood.statistics(x, vectorize=True) log_base = liklihood.log_base() return log_base + np.einsum('k,nk->n', nat_param[0], stats[0])\ + nat_param[1] * stats[1] + nat_param[3] * stats[3]\ + np.einsum('kh,nkh->n', nat_param[2], stats[2]) def expected_log_likelihood(self, x): _, _, _, _E_logdet_lmbda = self.expected_statistics() E_logdet_lmbda = 2. * _E_logdet_lmbda xc = np.einsum('nk,kh,nh->n', x - self.gaussian.mu, self.wishart.psi, x - self.gaussian.mu, optimize=True) # see Eqs. 10.64, 10.67, and 10.71 in Bishop # sneaky gaussian/quadratic identity hidden here return 0.5 * E_logdet_lmbda - 0.5 * self.dim / self.kappa\ - 0.5 * self.wishart.nu * xc\ - 0.5 * self.dim * np.log(2. * np.pi)