def estimate(self, n_iter=10000, tol=1e-10): e_ij = numpy.array( [ensemble.energy(self._e) for ensemble in self._ensembles]).T f = self._f log_n = log(self._n) self._L = [] for _i in range(n_iter): ## update density of states log_g = -log_sum_exp((-e_ij - f + log_n).T, 0) log_g -= log_sum_exp(log_g) ## update free energies f = log_sum_exp((-e_ij.T + log_g).T, 0) self._L.append((self._n * f).sum() - log_g.sum()) self._f = f self._log_g = log_g if self._stop_criterium(tol): break return f, log_g
def estimate(self, n_iter=10000, tol=1e-10): e_ij = numpy.array([ensemble.energy(self._e) for ensemble in self._ensembles]).T f = self._f log_n = log(self._n) self._L = [] for _i in range(n_iter): ## update density of states log_g = -log_sum_exp((-e_ij - f + log_n).T, 0) log_g -= log_sum_exp(log_g) ## update free energies f = log_sum_exp((-e_ij.T + log_g).T, 0) self._L.append((self._n * f).sum() - log_g.sum()) self._f = f self._log_g = log_g if self._stop_criterium(tol): break return f, log_g
def log_z(self, beta=1., ensembles=None): """ Use trapezoidal rule to evaluate the partition function. """ from numpy import array, multiply, reshape is_float = False if type(beta) == float: beta = reshape(array(beta), (-1, )) is_float = True x = self._ex[0, 1:] - self._ex[0, :-1] y = self._ex[0] for i in range(1, self._ex.shape[0]): x = multiply.outer(x, self._ex[i, 1:] - self._ex[i, :-1]) y = multiply.outer(y, self._ex[i]) y = -multiply.outer(beta, y) + self._log_g y = reshape(array([y.T[1:], y.T[:-1]]), (2, -1)) y = log_sum_exp(y, 0) - log(2) y = reshape(y, (-1, len(beta))).T + log(x) log_z = log_sum_exp(y.T, 0) if is_float: return float(log_z) else: return log_z
def histogram(w_f, w_r, n_iter=1e5, tol=1e-10, alpha=0., return_histogram=False): """ Histogram estimator analogous to histogram methods used in DOS estimation """ w = np.append(w_f,w_r) N = np.array([len(w_f), len(w_r)]) q = np.multiply.outer(np.array([0.,1.])-alpha, w) p = np.zeros(len(w)) - np.log(len(w)) L = [] for _ in xrange(int(n_iter)): f = -log_sum_exp((-q + p).T, 0) ## store log likelihood and report on progress L.append(-np.dot(N,f) - p.sum()) ## update log histogram and normalize p = -log_sum_exp((-q.T + f + np.log(N)).T, 0) p -= log_sum_exp(p) if len(L) > 1 and abs((L[-2]-L[-1]) / (L[-2]+L[-1])) < tol: break p = Entropy(w, p) if return_histogram: return p.log_Z(-alpha)-p.log_Z(1-alpha), p else: return p.log_Z(-alpha)-p.log_Z(1-alpha)
def estimate_Z(self, beta=1.): prob = - 0.5 * beta * self.delta / self.sigma**2 \ - self.D * beta * self.M * np.log(self.sigma) if not self.sequential_prior: prob += log(self.w) prob = np.exp((prob.T - log_sum_exp(prob.T, 0)).T) for n in range(self.N): self.Z[n,:] = np.random.multinomial(1, prob[n]) else: a = log(self.w) b = log((1-self.w)/(self.K-1)) for n in range(self.N): p = prob[n] if n > 1: p += self.Z[n-1] * a + (1-self.Z[n-1]) * b p = np.exp(p - log_sum_exp(p)) self.Z[n,:] = np.random.multinomial(1, p)
def log_z(self, beta=1., ensembles=None): """ Use trapezoidal rule to evaluate the partition function. """ from numpy import array, multiply, reshape is_float = False if type(beta) == float: beta = reshape(array(beta), (-1,)) is_float = True x = self._ex[0, 1:] - self._ex[0, :-1] y = self._ex[0] for i in range(1, self._ex.shape[0]): x = multiply.outer(x, self._ex[i, 1:] - self._ex[i, :-1]) y = multiply.outer(y, self._ex[i]) y = -multiply.outer(beta, y) + self._log_g y = reshape(array([y.T[1:], y.T[:-1]]), (2, -1)) y = log_sum_exp(y, 0) - log(2) y = reshape(y, (-1, len(beta))).T + log(x) log_z = log_sum_exp(y.T, 0) if is_float: return float(log_z) else: return log_z
def log_g(self, normalize=True): e_ij = numpy.array( [ensemble.energy(self._e) for ensemble in self._ensembles]).T log_g = -log_sum_exp((-e_ij - self._f + log(self._n)).T, 0) if normalize: log_g -= log_sum_exp(log_g) return log_g
def log_g(self, normalize=True): e_ij = numpy.array([ensemble.energy(self._e) for ensemble in self._ensembles]).T log_g = -log_sum_exp((-e_ij - self._f + log(self._n)).T, 0) if normalize: log_g -= log_sum_exp(log_g) return log_g
def calculate_evidence(dos): """Calculates the evidence from a DOS object :param dos: DOS object (output from calculate_DOS) :type dos: DOS :returns: log-evidence (without additive constants stemming from likelihood normalization) :rtype: float """ from csb.numeric import log_sum_exp return log_sum_exp(-dos.E.sum(1) + dos.s) - \ log_sum_exp(-dos.E[:,1] + dos.s)
def testMaxent(self): k = 2 data = csb.io.load(self.data_fn) model = MaxentModel(k) model.sample_weights() posterior = MaxentPosterior(model, data[:100000] / 180. * numpy.pi) model.get() * 1. x0 = posterior.model.get().flatten() target = lambda w:-posterior(w, n=50) x = fmin_powell(target, x0, disp=False) self.assertTrue(x != None) self.assertTrue(len(x) == k * k * 4) posterior.model.set(x) posterior.model.normalize(True) xx = numpy.linspace(0 , 2 * numpy.pi, 500) fx = posterior.model.log_prob(xx, xx) self.assertAlmostEqual(posterior.model.log_z(integration='simpson'), posterior.model.log_z(integration='trapezoidal'), places=2) self.assertTrue(fx != None) z = numpy.exp(log_sum_exp(numpy.ravel(fx))) self.assertAlmostEqual(z * xx[1] ** 2, 1., places=1)
def testTruncatedGamma(self): alpha = 2. beta = 1. x_min = 0.1 x_max = 5. x = truncated_gamma(10000, alpha, beta, x_min, x_max) self.assertTrue((x <= x_max).all()) self.assertTrue((x >= x_min).all()) hy, hx = density(x, 100) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') with warnings.catch_warnings(record=True) as warning: warnings.simplefilter("always") hy /= (hx[1] - hx[0]) * hy.sum() self.assertLessEqual(len(warning), 1) if len(warning) == 1: warning = warning[0] self.assertEqual(warning.category, RuntimeWarning) self.assertTrue( str(warning.message).startswith( 'divide by zero encountered')) x = numpy.linspace(x_min, x_max, 1000) p = (alpha - 1) * log(x) - beta * x p -= log_sum_exp(p) p = exp(p) / (x[1] - x[0])
def testTruncatedGamma(self): alpha = 2. beta = 1. x_min = 0.1 x_max = 5. x = truncated_gamma(10000, alpha, beta, x_min, x_max) self.assertTrue((x <= x_max).all()) self.assertTrue((x >= x_min).all()) hy, hx = density(x, 100) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') with warnings.catch_warnings(record=True) as warning: warnings.simplefilter("always") hy /= (hx[1] - hx[0]) * hy.sum() self.assertLessEqual(len(warning), 1) if len(warning) == 1: warning = warning[0] self.assertEqual(warning.category, RuntimeWarning) self.assertTrue(str(warning.message).startswith('divide by zero encountered')) x = numpy.linspace(x_min, x_max, 1000) p = (alpha - 1) * log(x) - beta * x p -= log_sum_exp(p) p = exp(p) / (x[1] - x[0])
def e_step(self): p = -0.5 * self.delta / self.sigma**2 - 3 * self.M * np.log( self.sigma) + np.log(self.w) p = np.exp((p.T - log_sum_exp(p.T, 0)).T) self.Z[:, :] = p
def probs(self): """ Soft assignments between points and coarse grained sites. """ log_prob = -0.5 * self.distances.T / self.params.s**2 log_prob -= log_sum_exp(log_prob, 0) return np.exp(log_prob.T)
def bar(w_f, w_r, tol=1e-4): """ Bennett's acceptance ratio """ dF = 0.5 * (jarzynski(w_f) - jarzynski(-w_r)) while 1: lhs = log_sum_exp(-np.log(1 + np.exp(+w_f - dF))) rhs = log_sum_exp(-np.log(1 + np.exp(-w_r + dF))) incr = rhs - lhs dF += incr if abs(incr) < tol: break return dF
def E_mean(self, beta): """ Average energy """ p = -beta * self.E + self.s p -= log_sum_exp(p) p = np.exp(p) return np.dot(p, self.E)
def estimate_scales(self, beta=1.0): """ Update scales from current model and samples @param beta: inverse temperature @type beta: float """ from csb.numeric import log, log_sum_exp, exp s_sq = (self.sigma ** 2).clip(1e-300, 1e300) Z = (log(self.w) - 0.5 * (self.delta / s_sq + self.dimension * log(s_sq))) * beta self.scales = exp(Z.T - log_sum_exp(Z.T))
def log_likelihood_reduced(self): """ Log-likelihood of the marginalized model (no auxiliary indicator variables) @rtype: float """ from csb.numeric import log, log_sum_exp s_sq = (self.sigma**2).clip(1e-300, 1e300) log_p = log(self.w) - 0.5 * \ (self.delta / s_sq + self.dimension * log(2 * numpy.pi * s_sq)) return log_sum_exp(log_p.T).sum()
def log_likelihood_reduced(self): """ Log-likelihood of the marginalized model (no auxiliary indicator variables) @rtype: float """ from csb.numeric import log, log_sum_exp s_sq = (self.sigma ** 2).clip(1e-300, 1e300) log_p = log(self.w) - 0.5 * \ (self.delta / s_sq + self.dimension * log(2 * numpy.pi * s_sq)) return log_sum_exp(log_p.T).sum()
def swap_rate(log_p, log_q, return_log=True): """ Computes the (log) swap rate of a replica exchange simulation, i.e. rate(p<->q) = \int p(x) q(y) \min[1, p(y)q(x)/p(x)q(y)] """ log_r = np.add.outer(log_p - log_sum_exp(log_p), log_q - log_sum_exp(log_q)) ## mask implementing the min operator mask = (log_r < log_r.T).astype('i') mask = (1 + mask - mask.T).flatten() log_r.shape = (-1, ) rate = log_sum_exp(log_r[mask > 0] + np.log(mask[mask > 0])) return rate if return_log else np.exp(rate)
def log_g(self, normalize=True): """ Return the Density of states (DOS). @param normalize: Ensure that the density of states sums to one @rtype: float """ if normalize: return self._log_g - log_sum_exp(self._log_g) else: return self._log_g
def estimate_scales(self, beta=1.0): """ Update scales from current model and samples @param beta: inverse temperature @type beta: float """ from csb.numeric import log, log_sum_exp, exp s_sq = (self.sigma**2).clip(1e-300, 1e300) Z = (log(self.w) - 0.5 * (self.delta / s_sq + self.dimension * log(s_sq))) * beta self.scales = exp(Z.T - log_sum_exp(Z.T))
def log_prob(self, x): from csb.numeric import log_sum_exp dim = self._d s = self.scales log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \ numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi))) if self._prior is not None: log_p += numpy.squeeze(self._prior.log_prob(s)) return log_sum_exp(log_p.T, 0)
def log_z(self, beta=1., ensembles=None): from numpy import multiply if ensembles is not None: e_ij_prime = numpy.array( [ensemble.energy(self._e) for ensemble in ensembles]) else: e_ij_prime = multiply.outer(beta, self._e) log_z = log_sum_exp((-e_ij_prime + self.log_g()).T, 0) return log_z
def log_z(self, beta=1., ensembles=None): from numpy import multiply if ensembles is not None: e_ij_prime = numpy.array([ensemble.energy(self._e) for ensemble in ensembles]) else: e_ij_prime = multiply.outer(beta, self._e) log_z = log_sum_exp((-e_ij_prime + self.log_g()).T, 0) return log_z
def estimate(self, n_bins=100, n_iter=10000, tol=1e-10): self._L = [] h, e = histogram_nd(self._e, nbins=n_bins, normalize=False) self._ex = e = numpy.array(e) self._h = h f = self._f log_h = log(h) log_g = h * 0.0 log_g -= log_sum_exp(log_g) log_n = log(self._n) e_ij = -numpy.squeeze( numpy.array([ensemble.energy(e) for ensemble in self._ensembles])).T for _i in range(n_iter): ## update density of states y = log_sum_exp(numpy.reshape((e_ij - f + log_n).T, (len(f), -1)), 0) log_g = log_h - numpy.reshape(y, log_g.shape) log_g -= log_sum_exp(log_g) ## update free energies f = log_sum_exp( numpy.reshape(e_ij.T + log_g.flatten(), (len(f), -1)).T, 0) self._L.append((self._n * f).sum() - (h * log_g).sum()) self._log_g = log_g self._f = f if self._stop_criterium(tol): break return f, log_g
def estimate(self, n_bins=100, n_iter=10000, tol=1e-10): self._L = [] h, e = histogram_nd(self._e, nbins=n_bins, normalize=False) self._ex = e = numpy.array(e) self._h = h f = self._f log_h = log(h) log_g = h * 0.0 log_g -= log_sum_exp(log_g) log_n = log(self._n) e_ij = -numpy.squeeze(numpy.array([ensemble.energy(e) for ensemble in self._ensembles])).T for _i in range(n_iter): ## update density of states y = log_sum_exp(numpy.reshape((e_ij - f + log_n).T, (len(f), -1)), 0) log_g = log_h - numpy.reshape(y, log_g.shape) log_g -= log_sum_exp(log_g) ## update free energies f = log_sum_exp(numpy.reshape(e_ij.T + log_g.flatten(), (len(f), -1)).T, 0) self._L.append((self._n * f).sum() - (h * log_g).sum()) self._log_g = log_g self._f = f if self._stop_criterium(tol): break return f, log_g
def testTruncatedNormal(self): mu = 2. sigma = 1. x_min = -1. x_max = 5. x = truncated_normal(10000, mu, sigma, x_min, x_max) self.assertAlmostEqual(numpy.mean(x), mu, delta=1e-1) self.assertAlmostEqual(numpy.var(x), sigma, delta=1e-1) self.assertTrue((x <= x_max).all()) self.assertTrue((x >= x_min).all()) hy, hx = density(x, 100) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') with warnings.catch_warnings(record=True) as warning: warnings.simplefilter("always") hy /= (hx[1] - hx[0]) * hy.sum() self.assertLessEqual(len(warning), 1) if len(warning) == 1: warning = warning[0] self.assertEqual(warning.category, RuntimeWarning) self.assertTrue( str(warning.message).startswith( 'divide by zero encountered')) x = numpy.linspace(mu - 5 * sigma, mu + 5 * sigma, 1000) p = -0.5 * (x - mu)**2 / sigma**2 p -= log_sum_exp(p) p = exp(p) / (x[1] - x[0])
def sample(self): from numpy.random import random from numpy import add from csb.numeric import log_sum_exp log_m = self.log_masses() log_M = log_sum_exp(log_m) c = add.accumulate(exp(log_m - log_M)) u = random() j = (u > c).sum() a = self.dh[j] z = self.z() xmin, xmax = z[j], z[j + 1] u = random() if a > 0: return xmax + log(u + (1 - u) * exp(-a * (xmax - xmin))) / a else: return xmin + log(u + (1 - u) * exp(a * (xmax - xmin))) / a
def testTruncatedNormal(self): mu = 2. sigma = 1. x_min = -1. x_max = 5. x = truncated_normal(10000, mu, sigma, x_min, x_max) self.assertAlmostEqual(numpy.mean(x), mu, delta=1e-1) self.assertAlmostEqual(numpy.var(x), sigma, delta=1e-1) self.assertTrue((x <= x_max).all()) self.assertTrue((x >= x_min).all()) hy, hx = density(x, 100) hx = 0.5 * (hx[1:] + hx[:-1]) hy = hy.astype('d') with warnings.catch_warnings(record=True) as warning: warnings.simplefilter("always") hy /= (hx[1] - hx[0]) * hy.sum() self.assertLessEqual(len(warning), 1) if len(warning) == 1: warning = warning[0] self.assertEqual(warning.category, RuntimeWarning) self.assertTrue(str(warning.message).startswith('divide by zero encountered')) x = numpy.linspace(mu - 5 * sigma, mu + 5 * sigma, 1000) p = -0.5 * (x - mu) ** 2 / sigma ** 2 p -= log_sum_exp(p) p = exp(p) / (x[1] - x[0])
def normalize(self): self.s -= log_sum_exp(self.s)
def jarzynski(w): """ Estimator based on Jarzynski equality. This is the estimator used in standard annealed importance sampling. """ return np.log(len(w))- log_sum_exp(-w)
def log_Z(self, beta): """ Log partition function """ return log_sum_exp(-beta * self.E + self.s)
(15, 'it4_15structures_148replicas'), (20, 'it4_20structures_148replicas'), (25, 'it4_25structures_172replicas'), (30, 'it4_30structures_172replicas'), (35, 'it4_35structures_172replicas'), (40, 'it4_40structures_172replicas'), (100,'it3_100structures_186replicas'), ) n_structures = [x[0] for x in simulations] output_dirs = [common_path + x[1] for x in simulations] logZs = [] data_terms = [] for x in output_dirs: dos = np.load(x + '/analysis/dos.pickle') logZs.append(log_sum_exp(-dos.E.sum(1) + dos.s) - \ log_sum_exp(-dos.E[:,1] + dos.s)) a = x.find('replicas') b = x[a-4:].find('_') n_replicas = int(x[a-4+b+1:a]) p = np.load(x + '/analysis/wham_params.pickle') c = parse_config_file(x + '/config.cfg') s = load_sr_samples(x + '/samples/', n_replicas, p['n_samples']+1, int(c['replica']['samples_dump_interval']), p['burnin']) sels = np.load(x + '/analysis/wham_sels.pickle') s = s[sels[-1]] p = make_posterior(parse_config_file(x + '/config.cfg')) L = p.likelihoods['ensemble_contacts'] d = L.forward_model.data_points[:,2]
while len(samples) < 1e4: samples.append(sampler.next()) if False: sampler.run(1e4) samples = sampler.samples print sampler.history ## evaluate true model y = np.array([state.value for state in samples]) x = gaussian.mu + 5 * gaussian.sigma * np.linspace(-1., 1., 1000) p = -0.5 * gaussian.tau * (x - gaussian.mu)**2 p -= log_sum_exp(p) p = np.exp(p - np.log(x[1] - x[0])) ## plot results kw_hist = dict(normed=True, histtype='stepfilled', bins=50, color='k', alpha=0.3) fig, ax = plt.subplots(1, 4, figsize=(12, 3)) ax[0].plot(y, color='k', alpha=0.3, lw=3) ax[0].set_xlabel('Monte Carlo iteration') ax[0].set_xlim(0, burnin)
dos = DOS(energies_flat, wham.s, sort_energies=False) ana_path = output_folder + 'analysis/' if not os.path.exists(ana_path): os.makedirs(ana_path) with open(ana_path + 'dos_it{}.pickle'.format(sys.argv[6]), 'w') as opf: dump(dos, opf) with open(ana_path + 'wham_params_it{}.pickle'.format(sys.argv[6]), 'w') as opf: dump(params, opf) with open(ana_path + 'wham_sels_it{}.pickle'.format(sys.argv[6]), 'w') as opf: dump(np.array(sels), opf) if False: from csb.numeric import log_sum_exp from csb.statistics.rand import sample_from_histogram logp = lambda beta: dos.s - beta * dos.E - log_sum_exp(dos.s - beta * dos.E) p = lambda beta: np.exp(logp(beta)) betas = np.load(output_folder + 'analysis/interp_dos_sched.pickle')['beta'] states = [samples[np.unravel_index(sample_from_histogram(p(beta)), samples.shape)] for beta in betas] from cPickle import dump dump(states, open('/scratch/scarste/ensemble_hic/hairpin_s/initstates.pickle','w')) if False: from csb.numeric import log_sum_exp from csb.statistics.rand import sample_from_histogram n_replicas = 298 path = '/scratch/scarste/ensemble_hic/nora2012/bothdomains_it3_1structures_{}replicas/'.format(n_replicas) dos = np.load(path + 'analysis/dos.pickle') schedule = np.load('/scratch/scarste/ensemble_hic/nora2012/bothdomains_lambdatempering_fromstates_40structures_99replicas/schedule.pickle')
## forward sampling, forward work X_f = [bridge[0].stationary.sample() for _ in xrange(n_paths)] E_f = [map(energy, X_f)] for T in bridge[1:]: X_f = map(T, X_f) E_f += [map(energy, X_f)] E_f = np.array(E_f) W_f = np.dot(beta[1:] - beta[:-1], E_f[:-1]) ## compute importance weights of final states p = np.exp(-W_f - log_sum_exp(-W_f)) p /= p.sum() ## select initial states from reverse simulation according to ## importance weights of final states from forward simulation X_r = [X_f[i] for i in np.random.multinomial(1, p, size=n_paths).argmax(1)] E_r = [map(energy, X_r)] ## backward simulation using reverse bridge (detailed balance!) for T in bridge[::-1][1:]: X_r = map(T, X_r) E_r += [map(energy, X_r)] E_r = np.array(E_r)