def __init__(self, prior, likelihood=None): # Normal-Gamma conjugate self.prior = prior # Normal-Gamma posterior self.posterior = copy.deepcopy(prior) # Gaussian likelihood if likelihood is not None: self.likelihood = likelihood else: mu, lmbdas = self.prior.rvs() self.likelihood = GaussianWithDiagonalPrecision(mu=mu, lmbdas=lmbdas)
def _expected_log_likelihood(self, x): # Natural parameter of marginal log-distirbution # are the expected statsitics of the posterior nat_param = self.expected_statistics() # Data statistics under a Gaussian likelihood # log-parition is subsumed into nat*stats liklihood = GaussianWithDiagonalPrecision( mu=np.empty_like(nat_param[0])) stats = liklihood.statistics(x, vectorize=True) log_base = liklihood.log_base() return log_base + np.einsum('k,nk->n', nat_param[0], stats[0])\ + np.einsum('k,nk->n', nat_param[1], stats[1])\ + np.einsum('k,nk->n', nat_param[2], stats[2])\ + np.einsum('k,nk->n', nat_param[3], stats[3])
def log_posterior_predictive_gaussian(self, x): mu, lmbdas = self.posterior_predictive_gaussian() return GaussianWithDiagonalPrecision(mu=mu, lmbdas=lmbdas).log_likelihood(x)
class GaussianWithNormalGamma: """ Multivariate Diagonal Gaussian distribution class. Uses a Normal-Gamma prior and posterior Parameters are mean and precision matrix: mu, lmbdas """ def __init__(self, prior, likelihood=None): # Normal-Gamma conjugate self.prior = prior # Normal-Gamma posterior self.posterior = copy.deepcopy(prior) # Gaussian likelihood if likelihood is not None: self.likelihood = likelihood else: mu, lmbdas = self.prior.rvs() self.likelihood = GaussianWithDiagonalPrecision(mu=mu, lmbdas=lmbdas) def empirical_bayes(self, data): self.prior.nat_param = self.likelihood.statistics(data) self.likelihood.params = self.prior.rvs() return self # Max a posteriori def max_aposteriori(self, data, weights=None): stats = self.likelihood.statistics(data) if weights is None\ else self.likelihood.weighted_statistics(data, weights) self.posterior.nat_param = self.prior.nat_param + stats self.likelihood.params = self.posterior.mode( ) # mode of gamma might not exist return self # Gibbs sampling def resample(self, data=[]): stats = self.likelihood.statistics(data) self.posterior.nat_param = self.prior.nat_param + stats self.likelihood.params = self.posterior.rvs() return self # Mean field def meanfield_update(self, data, weights=None): stats = self.likelihood.statistics(data) if weights is None\ else self.likelihood.weighted_statistics(data, weights) self.posterior.nat_param = self.prior.nat_param + stats self.likelihood.params = self.posterior.rvs() return self def meanfield_sgdstep(self, data, weights, prob, stepsize): stats = self.likelihood.statistics(data) if weights is None\ else self.likelihood.weighted_statistics(data, weights) self.posterior.nat_param = (1. - stepsize) * self.posterior.nat_param\ + stepsize * (self.prior.nat_param + 1. / prob * stats) self.likelihood.params = self.posterior.rvs() return self def variational_lowerbound(self): q_entropy = self.posterior.entropy() qp_cross_entropy = self.prior.cross_entropy(self.posterior) return q_entropy - qp_cross_entropy def log_marginal_likelihood(self): log_partition_prior = self.prior.log_partition() log_partition_posterior = self.posterior.log_partition() return log_partition_posterior - log_partition_prior def posterior_predictive_gaussian(self): mu, kappas, alphas, betas = self.posterior.params c = 1. + 1. / kappas lmbdas = (alphas / betas) * 1. / c return mu, lmbdas def log_posterior_predictive_gaussian(self, x): mu, lmbdas = self.posterior_predictive_gaussian() return GaussianWithDiagonalPrecision(mu=mu, lmbdas=lmbdas).log_likelihood(x) def posterior_predictive_studentt(self): mu, kappas, alphas, betas = self.posterior.params dfs = 2. * alphas c = 1. + 1. / kappas lmbdas = (alphas / betas) * 1. / c return mu, lmbdas, dfs def log_posterior_predictive_studentt(self, x): mu, lmbdas, dfs = self.posterior_predictive_studentt() log_posterior = 0. for _x, _mu, _lmbda, _df in zip(x, mu, lmbdas, dfs): log_posterior += mvt_logpdf(_x.reshape(-1, 1), _mu.reshape(-1, 1), _lmbda.reshape(-1, 1, 1), _df) return log_posterior
npr.seed(1337) gating = Categorical(K=2) components = [ GaussianWithDiagonalCovariance(mu=np.array([1., 1.]), sigmas=np.array([0.25, 0.5])), GaussianWithDiagonalCovariance(mu=np.array([-1., -1.]), sigmas=np.array([0.5, 0.25])) ] gmm = MixtureOfGaussians(gating=gating, components=components) obs, z = gmm.rvs(500) gmm.plot(obs) gating = Categorical(K=2) components = [ GaussianWithDiagonalPrecision(mu=npr.randn(2, ), lmbdas=np.ones((2, ))) for _ in range(2) ] model = MixtureOfGaussians(gating=gating, components=components) print('Expecation Maximization') model.max_likelihood(obs, maxiter=500) plt.figure() model.plot(obs)
def log_likelihood(self, x): mu, lmbdas = x return GaussianWithDiagonalPrecision(mu=self.gaussian.mu, lmbdas=self.kappas * lmbdas).log_likelihood(mu)\ + self.gamma.log_likelihood(lmbdas)
def __init__(self, mu, kappas, alphas, betas): self.gaussian = GaussianWithDiagonalPrecision(mu=mu) self.gamma = Gamma(alphas=alphas, betas=betas) self.kappas = kappas
class NormalGamma(Distribution): def __init__(self, mu, kappas, alphas, betas): self.gaussian = GaussianWithDiagonalPrecision(mu=mu) self.gamma = Gamma(alphas=alphas, betas=betas) self.kappas = kappas @property def dim(self): return self.gaussian.dim @property def params(self): return self.gaussian.mu, self.kappas, self.gamma.alphas, self.gamma.betas @params.setter def params(self, values): self.gaussian.mu, self.kappas, self.gamma.alphas, self.gamma.betas = values def rvs(self, size=1): lmbdas = self.gamma.rvs() self.gaussian.lmbdas = self.kappas * lmbdas mu = self.gaussian.rvs() return mu, lmbdas def mean(self): return self.gaussian.mean(), self.gamma.mean() def mode(self): return self.gaussian.mode(), self.gamma.mode() def log_likelihood(self, x): mu, lmbdas = x return GaussianWithDiagonalPrecision(mu=self.gaussian.mu, lmbdas=self.kappas * lmbdas).log_likelihood(mu)\ + self.gamma.log_likelihood(lmbdas) @property def base(self): return self.gaussian.base * self.gamma.base def log_base(self): return np.log(self.base) @property def nat_param(self): return self.std_to_nat(self.params) @nat_param.setter def nat_param(self, natparam): self.params = self.nat_to_std(natparam) @staticmethod def std_to_nat(params): # The definition of stats is slightly different # from literatur to make posterior updates easy # Assumed stats # stats = [lmbdas * x, # -0.5 * lmbdas * xx, # 0.5 * log_lmbdas # -0.5 * lmbdas] mu = params[1] * params[0] kappas = params[1] alphas = 2. * params[2] - 1. betas = 2. * params[3] + params[1] * params[0]**2 return Stats([mu, kappas, alphas, betas]) @staticmethod def nat_to_std(natparam): mu = natparam[0] / natparam[1] kappas = natparam[1] alphas = 0.5 * (natparam[2] + 1.) betas = 0.5 * (natparam[3] - kappas * mu**2) return mu, kappas, alphas, betas def log_partition(self, params=None): mu, kappas, alphas, betas = params if params is not None else self.params return -0.5 * np.sum(np.log(kappas)) + Gamma( alphas=alphas, betas=betas).log_partition() def expected_statistics(self): # stats = [lmbdas * x, # -0.5 * lmbdas * xx, # 0.5 * log_lmbdas # -0.5 * lmbdas] E_x = self.gamma.alphas / self.gamma.betas * self.gaussian.mu E_lmbdas_xx = -0.5 * (1. / self.kappas + self.gaussian.mu * E_x) E_log_lmbdas = 0.5 * (digamma(self.gamma.alphas) - np.log(self.gamma.betas)) E_lmbdas = -0.5 * (self.gamma.alphas / self.gamma.betas) return E_x, E_lmbdas_xx, E_log_lmbdas, E_lmbdas def entropy(self): nat_param, stats = self.nat_param, self.expected_statistics() return self.log_partition() - self.log_base()\ - (np.dot(nat_param[0], stats[0]) + np.dot(nat_param[1], stats[1]) + np.dot(nat_param[2], stats[2]) + np.dot(nat_param[3], stats[3])) def cross_entropy(self, dist): nat_param, stats = dist.nat_param, self.expected_statistics() return self.log_partition() - self.log_base()\ - (np.dot(nat_param[0], stats[0]) + np.dot(nat_param[1], stats[1]) + np.dot(nat_param[2], stats[2]) + np.dot(nat_param[3], stats[3])) # This implementation is valid but terribly slow def _expected_log_likelihood(self, x): # Natural parameter of marginal log-distirbution # are the expected statsitics of the posterior nat_param = self.expected_statistics() # Data statistics under a Gaussian likelihood # log-parition is subsumed into nat*stats liklihood = GaussianWithDiagonalPrecision( mu=np.empty_like(nat_param[0])) stats = liklihood.statistics(x, vectorize=True) log_base = liklihood.log_base() return log_base + np.einsum('k,nk->n', nat_param[0], stats[0])\ + np.einsum('k,nk->n', nat_param[1], stats[1])\ + np.einsum('k,nk->n', nat_param[2], stats[2])\ + np.einsum('k,nk->n', nat_param[3], stats[3]) def expected_log_likelihood(self, x): E_x, E_lmbdas_xx, E_log_lmbdas, E_lmbdas = self.expected_statistics() return (x**2).dot(E_lmbdas) + x.dot(E_x)\ + E_lmbdas_xx.sum() + E_log_lmbdas.sum()\ - 0.5 * self.dim * np.log(2. * np.pi)