def log_z(self, beta=1., ensembles=None): """ Use trapezoidal rule to evaluate the partition function. """ from numpy import array, multiply, reshape is_float = False if type(beta) == float: beta = reshape(array(beta), (-1,)) is_float = True x = self._ex[0, 1:] - self._ex[0, :-1] y = self._ex[0] for i in range(1, self._ex.shape[0]): x = multiply.outer(x, self._ex[i, 1:] - self._ex[i, :-1]) y = multiply.outer(y, self._ex[i]) y = -multiply.outer(beta, y) + self._log_g y = reshape(array([y.T[1:], y.T[:-1]]), (2, -1)) y = log_sum_exp(y, 0) - log(2) y = reshape(y, (-1, len(beta))).T + log(x) log_z = log_sum_exp(y.T, 0) if is_float: return float(log_z) else: return log_z
def log_z(self, beta=1., ensembles=None): """ Use trapezoidal rule to evaluate the partition function. """ from numpy import array, multiply, reshape is_float = False if type(beta) == float: beta = reshape(array(beta), (-1, )) is_float = True x = self._ex[0, 1:] - self._ex[0, :-1] y = self._ex[0] for i in range(1, self._ex.shape[0]): x = multiply.outer(x, self._ex[i, 1:] - self._ex[i, :-1]) y = multiply.outer(y, self._ex[i]) y = -multiply.outer(beta, y) + self._log_g y = reshape(array([y.T[1:], y.T[:-1]]), (2, -1)) y = log_sum_exp(y, 0) - log(2) y = reshape(y, (-1, len(beta))).T + log(x) log_z = log_sum_exp(y.T, 0) if is_float: return float(log_z) else: return log_z
def estimate_Z(self, beta=1.): prob = - 0.5 * beta * self.delta / self.sigma**2 \ - self.D * beta * self.M * np.log(self.sigma) if not self.sequential_prior: prob += log(self.w) prob = np.exp((prob.T - log_sum_exp(prob.T, 0)).T) for n in range(self.N): self.Z[n,:] = np.random.multinomial(1, prob[n]) else: a = log(self.w) b = log((1-self.w)/(self.K-1)) for n in range(self.N): p = prob[n] if n > 1: p += self.Z[n-1] * a + (1-self.Z[n-1]) * b p = np.exp(p - log_sum_exp(p)) self.Z[n,:] = np.random.multinomial(1, p)
def log_z(self, n=500, integration='simpson'): """ Calculate the log partion function. """ from numpy import pi, linspace, max from csb.numeric import log, exp if integration == 'simpson': from csb.numeric import simpson_2d x = linspace(0., 2 * pi, 2 * n + 1) dx = x[1] - x[0] f = -self.beta * self.energy(x) f_max = max(f) f -= f_max I = simpson_2d(exp(f)) return log(I) + f_max + 2 * log(dx) elif integration == 'trapezoidal': from csb.numeric import trapezoidal_2d x = linspace(0., 2 * pi, n) dx = x[1] - x[0] f = -self.beta * self.energy(x) f_max = max(f) f -= f_max I = trapezoidal_2d(exp(f)) return log(I) + f_max + 2 * log(dx) else: raise NotImplementedError( 'Choose from trapezoidal and simpson-rule Integration')
def log_Z(self, beta=1.): from csb.numeric import log from scipy.special import gammainc, gammaln return log(0.5 * self.d) + log(gammainc(0.5 * self.d, 0.5 * self.k)) + \ gammaln(0.5 * self.d) + (0.5 * self.d) * (log(2) - log(self.k))
def estimate(self, context, data): """ Generate samples from the posterior of alpha and beta. For beta the posterior is a gamma distribution and analytically acessible. The posterior of alpha can not be expressed analytically and is aproximated using adaptive rejection sampling. """ pdf = GammaPrior() y = log(data).mean() - log((data ** -1).mean()) alpha = inv_digamma_minus_log(numpy.clip(y, - 1e308, - 1e-300)) alpha = abs(alpha) beta = numpy.clip(alpha / (data ** (-1)).mean(), 1e-100, 1e100) pdf.alpha, pdf.beta = alpha, beta return pdf
def log_prob(self, x): a = self['a'] b = self['b'] p = self['p'] lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b))) return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
def log_prob(self, x): from numpy.linalg import det mu = self.mu S = self.sigma D = len(mu) q = self.__q(x) return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
def testLog(self): from csb.numeric import log, LOG_MAX, LOG_MIN from numpy import log as ref_log x = np.linspace(LOG_MIN, LOG_MAX, 1000000) self.assertTrue((log(x) == ref_log(x)).all()) self.assertEqual(log(10 * LOG_MAX), log(LOG_MAX)) self.assertEqual(log(0.1 * LOG_MIN), log(LOG_MIN))
def log_prob(self, x): from numpy.linalg import det mu = self.mu S = self.sigma D = len(mu) q = self.__q(x) return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q**2
def log_likelihood_reduced(self): """ Log-likelihood of the marginalized model (no auxiliary indicator variables) @rtype: float """ from csb.numeric import log, log_sum_exp s_sq = (self.sigma**2).clip(1e-300, 1e300) log_p = log(self.w) - 0.5 * \ (self.delta / s_sq + self.dimension * log(2 * numpy.pi * s_sq)) return log_sum_exp(log_p.T).sum()
def energy(self, raw_energies): from numpy import isinf if isinf(self.beta): m = (raw_energies >= self.e_max).astype('f') return - m * log(0.) else: x = 1 + exp(self.beta * (raw_energies - self.e_max)) return log(x)
def energy(self, raw_energies): from numpy import isinf if isinf(self.beta): m = (raw_energies >= self.e_max).astype('f') return -m * log(0.) else: x = 1 + exp(self.beta * (raw_energies - self.e_max)) return log(x)
def estimate_scales(self, beta=1.0): """ Update scales from current model and samples @param beta: inverse temperature @type beta: float """ from csb.numeric import log, log_sum_exp, exp s_sq = (self.sigma ** 2).clip(1e-300, 1e300) Z = (log(self.w) - 0.5 * (self.delta / s_sq + self.dimension * log(s_sq))) * beta self.scales = exp(Z.T - log_sum_exp(Z.T))
def log_likelihood_reduced(self): """ Log-likelihood of the marginalized model (no auxiliary indicator variables) @rtype: float """ from csb.numeric import log, log_sum_exp s_sq = (self.sigma ** 2).clip(1e-300, 1e300) log_p = log(self.w) - 0.5 * \ (self.delta / s_sq + self.dimension * log(2 * numpy.pi * s_sq)) return log_sum_exp(log_p.T).sum()
def log_prob(self, x): a = self['a'] b = self['b'] p = self['p'] lz = 0.5 * p * (log(a) - log(b)) - log( 2 * scipy.special.kv(p, sqrt(a * b))) return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
def estimate_scales(self, beta=1.0): """ Update scales from current model and samples @param beta: inverse temperature @type beta: float """ from csb.numeric import log, log_sum_exp, exp s_sq = (self.sigma**2).clip(1e-300, 1e300) Z = (log(self.w) - 0.5 * (self.delta / s_sq + self.dimension * log(s_sq))) * beta self.scales = exp(Z.T - log_sum_exp(Z.T))
def log_prob(self, x): mu = self.mu scale = self.shape x = numpy.array(x) if numpy.min(x) <= 0: raise ValueError('InverseGaussian is defined for x > 0') y = -0.5 * scale * (x - mu) ** 2 / (mu ** 2 * x) z = 0.5 * (log(scale) - log(2 * pi * x ** 3)) return z + y
def estimate(self, context, data): mu = mean(data) logmean = mean(log(data)) a = 0.5 / (log(mu) - logmean) for dummy in range(self.n_iter): a = inv_psi(logmean - log(mu) + log(a)) return Gamma(a, a / mu)
def log_prob(self, x): mu = self.mu scale = self.shape x = numpy.array(x) if numpy.min(x) <= 0: raise ValueError('InverseGaussian is defined for x > 0') y = -0.5 * scale * (x - mu)**2 / (mu**2 * x) z = 0.5 * (log(scale) - log(2 * pi * x**3)) return z + y
def log_prob(self, x): from csb.numeric import log_sum_exp dim = self._d s = self.scales log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \ numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi))) if self._prior is not None: log_p += numpy.squeeze(self._prior.log_prob(s)) return log_sum_exp(log_p.T, 0)
def log_likelihood(self): """ Log-likelihood of the extended model (with indicators) @rtype: float """ from csb.numeric import log from numpy import pi, sum n = self.scales.sum(1) N = self.dimension Z = self.scales.T s_sq = (self.sigma**2).clip(1e-300, 1e300) return sum(n * log(self.w)) - 0.5 * \ (sum(Z * self.delta / s_sq) + N * sum(n * log(2 * pi * s_sq)) + sum(log(s_sq)))
def log_likelihood(self): """ Log-likelihood of the extended model (with indicators) @rtype: float """ from csb.numeric import log from numpy import pi, sum n = self.scales.sum(1) N = self.dimension Z = self.scales.T s_sq = (self.sigma ** 2).clip(1e-300, 1e300) return sum(n * log(self.w)) - 0.5 * \ (sum(Z * self.delta / s_sq) + N * sum(n * log(2 * pi * s_sq)) + sum(log(s_sq)))
def estimate(self, context, data): """ Generate samples from the posterior of alpha and beta. For beta the posterior is a gamma distribution and analytically acessible. The posterior of alpha can not be expressed analytically and is aproximated using adaptive rejection sampling. """ pdf = GammaPrior() ## sufficient statistics h = harmonic_mean(numpy.clip(data, 1e-308, 1e308)) g = geometric_mean(numpy.clip(data, 1e-308, 1e308)) n = len(data) samples = [] a = numpy.mean(1 / data) v = numpy.std(1 / data) ** 2 beta = a / v alpha = beta * a for i in range(self.n_samples): ## sample alpha with ARS logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta, context.hyper_alpha.alpha - 1., n) ars = csb.statistics.ars.ARS(logp) ars.initialize(logp.initial_values()[:2], z0=0.) alpha = numpy.abs(ars.sample()) if alpha is None: raise ValueError("Sampling failed") ## sample beta from Gamma distribution beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \ 1 / (n / h + context.hyper_beta.beta)) samples.append((alpha, beta)) pdf.alpha, pdf.beta = samples[-1] return pdf
def BIC(self): """ Bayesian information criterion, calculated as BIC = M * ln(sigma_e^2) + K * ln(M) @rtype: float """ from numpy import log n = self.M k = self.K error_variance = sum(self.sigma**2 * self.w) return n * log(error_variance) + k * log(n)
def BIC(self): """ Bayesian information criterion, calculated as BIC = M * ln(sigma_e^2) + K * ln(M) @rtype: float """ from numpy import log n = self.M k = self.K error_variance = sum(self.sigma ** 2 * self.w) return n * log(error_variance) + k * log(n)
def estimate(self, n_iter=10000, tol=1e-10): e_ij = numpy.array( [ensemble.energy(self._e) for ensemble in self._ensembles]).T f = self._f log_n = log(self._n) self._L = [] for _i in range(n_iter): ## update density of states log_g = -log_sum_exp((-e_ij - f + log_n).T, 0) log_g -= log_sum_exp(log_g) ## update free energies f = log_sum_exp((-e_ij.T + log_g).T, 0) self._L.append((self._n * f).sum() - log_g.sum()) self._f = f self._log_g = log_g if self._stop_criterium(tol): break return f, log_g
def testTruncatedGamma(self): alpha = 2. beta = 1. x_min = 0.1 x_max = 5. x = truncated_gamma(10000, alpha, beta, x_min, x_max) self.assertTrue((x <= x_max).all()) self.assertTrue((x >= x_min).all()) hy, hx = density(x, 100) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') with warnings.catch_warnings(record=True) as warning: warnings.simplefilter("always") hy /= (hx[1] - hx[0]) * hy.sum() self.assertLessEqual(len(warning), 1) if len(warning) == 1: warning = warning[0] self.assertEqual(warning.category, RuntimeWarning) self.assertTrue(str(warning.message).startswith('divide by zero encountered')) x = numpy.linspace(x_min, x_max, 1000) p = (alpha - 1) * log(x) - beta * x p -= log_sum_exp(p) p = exp(p) / (x[1] - x[0])
def estimate(self, n_iter=10000, tol=1e-10): e_ij = numpy.array([ensemble.energy(self._e) for ensemble in self._ensembles]).T f = self._f log_n = log(self._n) self._L = [] for _i in range(n_iter): ## update density of states log_g = -log_sum_exp((-e_ij - f + log_n).T, 0) log_g -= log_sum_exp(log_g) ## update free energies f = log_sum_exp((-e_ij.T + log_g).T, 0) self._L.append((self._n * f).sum() - log_g.sum()) self._f = f self._log_g = log_g if self._stop_criterium(tol): break return f, log_g
def inv_digamma_minus_log(y, tol=1e-10, n_iter=100): """ Solve y = psi(alpha) - log(alpha) for alpha by fixed point integration. """ if y >= -log(6.): x = 1 / (2 * (1 - exp(y))) else: x = 1.e-10 for _i in range(n_iter): z = approx_psi(x) - log(x) - y if abs(z) < tol: break x -= x * z / (x * d_approx_psi(x) - 1) x = abs(x) return x
def testTruncatedGamma(self): alpha = 2. beta = 1. x_min = 0.1 x_max = 5. x = truncated_gamma(10000, alpha, beta, x_min, x_max) self.assertTrue((x <= x_max).all()) self.assertTrue((x >= x_min).all()) hy, hx = density(x, 100) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') with warnings.catch_warnings(record=True) as warning: warnings.simplefilter("always") hy /= (hx[1] - hx[0]) * hy.sum() self.assertLessEqual(len(warning), 1) if len(warning) == 1: warning = warning[0] self.assertEqual(warning.category, RuntimeWarning) self.assertTrue( str(warning.message).startswith( 'divide by zero encountered')) x = numpy.linspace(x_min, x_max, 1000) p = (alpha - 1) * log(x) - beta * x p -= log_sum_exp(p) p = exp(p) / (x[1] - x[0])
def log_prob(self, x): mu = self.mu beta = self.beta z = (x - mu) / beta return log(1. / beta) - z - exp(-z)
def testLogTrapezoidal(self): from csb.numeric import log_trapezoidal, log x = np.linspace(-100., 100, 1000) y = -0.5 * x * x - log(np.sqrt(2 * np.pi)) self.assertTrue(abs(log_trapezoidal(y, x)) <= 1e-8)
def log_prob(self, x): mu = self.mu sigma = self.sigma return log( 1.0 / sqrt(2 * pi * sigma**2)) - (x - mu)**2 / (2 * sigma**2)
def log_prob(self, x): mu = self.mu alpha = self.alpha beta = self.beta return log(beta / (2.0 * alpha)) - gammaln(1. / beta) - power(fabs(x - mu) / alpha, beta)
def __call__(self, x): from scipy.special import gammaln return self.a * x - \ self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \ self.b * log(x), \ self.a - self.n * psi(x) + self.b / x
def log_prob(self, x): mu = self.mu alpha = self.alpha beta = self.beta return log(beta / (2.0 * alpha)) - gammaln(1. / beta) - power( fabs(x - mu) / alpha, beta)
def energy(self, raw_energies): q = self.q e_min = self.e_min if (q < 1 + 1e-10): return raw_energies * q else: return log(1 + (raw_energies - e_min) * (q - 1)) * q / (q - 1) + e_min
def estimate(self, context, data): """ Generate samples from the posterior of alpha and beta. For beta the posterior is a gamma distribution and analytically acessible. The posterior of alpha can not be expressed analytically and is aproximated using adaptive rejection sampling. """ pdf = GammaPrior() ## sufficient statistics a = numpy.mean(data) b = exp(numpy.mean(log(data))) v = numpy.std(data) ** 2 n = len(data) beta = a / v alpha = beta * a samples = [] for _i in range(self.n_samples): ## sample beta from Gamma distribution beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha, 1 / (n * a + context._hyper_beta.beta)) ## sample alpha with ARS logp = ARSPosteriorAlpha(n * log(beta * b)\ - context.hyper_alpha.beta, context.hyper_alpha.alpha - 1., n) ars = csb.statistics.ars.ARS(logp) ars.initialize(logp.initial_values()[:2], z0=0.) alpha = ars.sample() if alpha is None: raise ValueError("ARS failed") samples.append((alpha, beta)) pdf.alpha, pdf.beta = samples[-1] return pdf
def log_likelihood(self): """ returns the log-likelihood """ N = self.Z.sum(0) L = - 0.5 * np.sum(self.Z * self.delta / self.sigma**2) \ - 0.5 * self.D * self.M * np.sum(N * log(2 * np.pi * self.sigma**2)) return L
def log_prior(self): N = self.Z.sum(0) p = -0.5 * np.sum(self.t**2) / self.sigma_t**2 p+= -0.5 * np.sum(self.Y**2) / self.sigma_Y**2 p+= np.sum((self.alpha_precision-1) * log(1/self.sigma**2) - self.beta_precision / self.sigma**2) if not self.sequential_prior: p += np.sum((N + self.alpha_w - 1) * np.log(self.w)) else: l = self.membership Q = np.sum(l[1:] == l[:-1]) p += (Q + self.alpha_w - 1) * log(self.w) + \ (self.N - Q - self.alpha_w - 1) * log(1-self.w) return p
def log_g(self, normalize=True): e_ij = numpy.array([ensemble.energy(self._e) for ensemble in self._ensembles]).T log_g = -log_sum_exp((-e_ij - self._f + log(self._n)).T, 0) if normalize: log_g -= log_sum_exp(log_g) return log_g
def log_g(self, normalize=True): e_ij = numpy.array( [ensemble.energy(self._e) for ensemble in self._ensembles]).T log_g = -log_sum_exp((-e_ij - self._f + log(self._n)).T, 0) if normalize: log_g -= log_sum_exp(log_g) return log_g
def entropy(self, n=500): """ Calculate the entropy of the model. @param n: number of integration points for numerical integration @type n: integer """ from csb.numeric import trapezoidal_2d from numpy import pi, linspace, max from csb.numeric import log, exp x = linspace(0., 2 * pi, n) dx = x[1] - x[0] f = -self.beta * self.energy(x) f_max = max(f) log_z = log(trapezoidal_2d(exp(f - f_max))) + f_max + 2 * log(dx) average_energy = trapezoidal_2d(f * exp(f - f_max))\ * exp(f_max + 2 * log(dx) - log_z) return -average_energy + log_z