def gen_proposal(self, ancestor=None): self.prepare_ancestor(ancestor) f = ancestor.other["lrate"] if True: (f, theta_1, lpost_1, grad_1, foo) = find_step_size(ancestor.sample, f, ancestor.lpost, ancestor.other["gr"], func_and_grad=self.lpost_and_grad) sc_gr = f * 0.5 * ancestor.other["gr"] cov = ideal_covar(sc_gr, fix_main_var=self.main_var, other_var=self.other_var) # , fix_main_var=1 step_dist = mvnorm(sc_gr, cov) #prop_dist = mvnorm(ancestor.sample + sc_gr, self.cov) #print(ancestor.lpost, lpost_1) else: step_dist = mvnorm(np.zeros(ancestor.sample.size), np.eye(ancestor.sample.size) * self.main_var) (new_samp, step) = gen_sample_prototype(ancestor, self, step_dist=step_dist, lpost_and_grad_func=self.lpost_and_grad) new_samp.other["lrate"] = f return new_samp
def test_Rosenbrock(): np.random.seed(2) def lpost_and_grad(theta, grad = True): fval = -sp.optimize.rosen(theta) if not grad: return fval else: return (fval, -sp.optimize.rosen_der(theta)) lpost = lambda x: lpost_and_grad(x, False) theta=np.array((1, 1)) dim = 2 inits = mvnorm([0]*dim, np.eye(dim)*5).rvs(10) for i in len(inits): initial = inits[i] ###### MCMC ###### for mk in [mcmc.GaussMHKernel(lpost, 1, True), mcmc.GaussMHKernel(lpost, np.eye(dim), False), mcmc.ComponentWiseSliceSamplingKernel(lpost)]: (samp, trace) = mcmc.sample(100, -theta, mk) samp_m = samp[len(samp)//2:].mean(0) print(mk, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(mk, np.mean((samp_m - theta)**2), samp_m, theta) #assert(False) ###### PMC ###### for (prop, num_samp) in [(pmc.NaiveRandomWalkProposal(lpost, mvnorm([0]*dim, np.eye(dim)*5)), 1000), (pmc.GradientAscentProposal(lpost_and_grad, dim, lrate = 0.1), 100), (pmc.ConjugateGradientAscentProposal(lpost_and_grad, dim, lrate = 0.1), 100)]: (samp, trace) = pmc.sample(num_samp, [-theta]*10, prop) #sample_lpost_based samp_m = samp[len(samp)//2:].mean(0)#samp.mean(0) print(prop, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(prop, np.mean((samp_m - theta)**2), samp_m, theta)
def test_rkhs_dens_and_operators(D=1, nsamps=200): targ = dist.mixt(D, [ dist.mvnorm(3 * np.ones(D), np.eye(D) * 0.7**2), dist.mvnorm(7 * np.ones(D), np.eye(D) * 1.5**2) ], [0.5, 0.5]) out_samps = targ.rvs(nsamps) gk_x = GaussianKernel(0.7) de = RKHSDensityEstimator(out_samps, gk_x, 0.1) x = np.linspace(-1, 12, 200) pl.figure() pl.plot(x, exp(targ.logpdf(x)), 'k-', label='truth') pl.plot(x, de.eval_rkhs_density_approx(x[:, None]), 'b--', label='Density estimate') pl.plot(x, de.eval_kme(x[:, None]), 'r:', label='KDE/Kernel mean embedding') pl.legend(loc='best') pl.savefig('Density_estimation_(preimage_of_KDE).pdf') inp_samps = (out_samps - 5)**2 + np.random.randn(*out_samps.shape) gk_y = GaussianKernel(1) cme = ConditionMeanEmbedding(inp_samps, out_samps, gk_y, gk_x, 5) cdo = ConditionDensityOperator(inp_samps, out_samps, gk_y, gk_x, 5, 5) (fig, ax) = pl.subplots(3, 1, True, False, figsize=(10, 10)) ax[2].scatter(out_samps, inp_samps, alpha=0.3) ax[2].axhline(0, 0, 8, color='r', linestyle='--') ax[2].axhline(5, 0, 8, color='r', linestyle='--') ax[2].set_title("Input: y, output: x, %d pairs" % nsamps) ax[2].set_yticks((0, 5)) d = cdo.lhood(np.array([[0.], [5.]]), x[:, None]).T e = cme.lhood(np.array([[0.], [5.]]), x[:, None]).T assert (d.shape[0] == 2) assert (np.allclose(d[0], cdo.lhood(0, x[:, None]))) assert (np.allclose(d[1], cdo.lhood(5, x[:, None]))) # assert() ax[1].plot(x, d[1], '-', label='cond. density') ax[1].plot(x, e[1], '--', label='cond. mean emb.') ax[1].set_title("p(x|y=5)") ax[0].plot(x, d[0], '-', label='cond. density') ax[0].plot(x, e[0], '--', label='cond. mean emb.') ax[0].set_title("p(x|y=0)") ax[0].legend(loc='best') fig.show() fig.savefig("conditional_density_operator.pdf")
def gen_gauss_diag_lpost(num_datasets, dims, ev_params = [(80, 10), (40,10)], cov_var_const = 4, with_grad = False): def gen_lp_unnorm_ev(lev, distr_norm, with_grad = False): # print(distr_norm.mu, distr_norm.K) rval = lambda x:distr_norm.logpdf(x) + lev rval.log_evidence = lev if with_grad: rval.lpdf_and_grad = lambda x, pdf, grad: distr_norm.log_pdf_and_grad(x, pdf, grad) return rval rval = [] for ep in ev_params: lev_distr = stats.gamma(ep[0], scale=ep[1]) for i in range(int(num_datasets//len(ev_params))): while True: try: m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims)*1000) K = np.eye(dims) val = gen_lp_unnorm_ev(-lev_distr.rvs(), mvnorm(m, K), with_grad = with_grad) val.mean = m val.cov = K rval.append(val) break except np.linalg.LinAlgError: import sys #The Matrix from the niw was not invertible. Try again. print("np.linalg.LinAlgError - trying again", file=sys.stderr) pass return rval
def test_optimization_gradient_ascent(): num_dims = 4 lds = sobol_seq.i4_sobol_generate(num_dims, 100).T var = 5 K = np.eye(num_dims) * var Ki = np.linalg.inv(K) L = np.linalg.cholesky(K) logdet_K = np.linalg.slogdet(K)[1] samp = mvnorm(np.array([1000]*num_dims), K, Ki = Ki, L = L, logdet_K = logdet_K).ppf(lds) m_samp = samp.mean(0) def lpost_and_grad(theta): (llh, gr) = mvnorm(theta, K, Ki = Ki, L = L, logdet_K = logdet_K).log_pdf_and_grad(samp) return (llh.sum(), -gr.sum(0).flatten()) theta = np.array([0]*num_dims) mse_cga = ((m_samp - conjugate_gradient_ascent(theta, lpost_and_grad)[0])**2).mean() mse_ga = ((m_samp - gradient_ascent(theta, lpost_and_grad)[0])**2).mean() print(mse_ga, mse_cga) assert(mse_ga < 10**-5) assert(mse_cga < 10**-5)
def gen_proposal(self, ancestor = None): assert() #this needs to be updated assert(ancestor is not None) if np.linalg.norm(ancestor.other["gr"]) < 10**-10: #we are close to a local maximum f = ancestor.other["lrate"] prop_dist = self.jump_dist else: #we are at a distance to a local maximum #step in direction of gradient. (f, theta_1, fval_1, back_off_tmp) = find_step_size(ancestor.sample, ancestor.other["lrate"], ancestor.lpost, ancestor.other["conj"], func = self.lpost) self.back_off_count += back_off_tmp step_mean = f * 0.5 * ancestor.other["conj"] cov = ideal_covar(step_mean, main_var_scale = 1, other_var = 0.5) # , fix_main_var=1 prop_dist = mvnorm(step_mean, cov) step = prop_dist.rvs() samp = ancestor.sample + prop_dist.rvs() (lp, gr) = self.lpost_and_grad(samp) momentum = max(0, np.float(gr.T.dot(gr - ancestor.other["gr"]) / ancestor.other["gr"].T.dot(ancestor.other["gr"]))) conj_dir_1 = gr + momentum * ancestor.other["conj"] lprop = prop_dist.logpdf(step) rval = PmcSample(ancestor = ancestor, sample = samp, lpost = lp, lprop = lprop, prop_obj = self, lweight = lp - prop_dist.logpdf(step), other = {"lrate":f, "gr":gr, "conj":conj_dir_1}) return rval
def gen_proposal(self, ancestor = None): assert() #this needs to be updated assert(ancestor is not None) if np.linalg.norm(ancestor.other["gr"]) < 10**-10: #we are close to a local maximum f = ancestor.other["lrate"] prop_dist = self.jump_dist else: #we are at a distance to a local maximum #step in direction of gradient. (f, theta_1, fval_1, back_off_tmp) = find_step_size(ancestor.sample, ancestor.other["lrate"], ancestor.lpost, ancestor.other["conj"], func = self.lpost) self.back_off_count += back_off_tmp step_mean = f * 0.5 * ancestor.other["conj"] cov = ideal_covar(step_mean, main_var_scale = 1, other_var = 0.5) # , fix_main_var=1 prop_dist = mvnorm(step_mean, cov) step = prop_dist.rvs() samp = ancestor.sample + prop_dist.rvs() (lp, gr) = self.lpost_and_grad(samp) momentum = max(0, np.float(gr.T.dot(gr - ancestor.other["gr"]) / ancestor.other["gr"].T.dot(ancestor.other["gr"]))) conj_dir_1 = gr + momentum * ancestor.other["conj"] lprop = prop_dist.logpdf(step) rval = PmcSample(ancestor = ancestor, sample = samp, lpost = lp, lprop = lprop, prop_obj = self, lweight = lp - prop_dist.logpdf(step), other = {"lrate":f, "gr":gr, "conj":conj_dir_1}) return rval
def test_optimization_gradient_ascent(): num_dims = 4 lds = sobol_seq.i4_sobol_generate(num_dims, 100).T var = 5 K = np.eye(num_dims) * var Ki = np.linalg.inv(K) L = np.linalg.cholesky(K) logdet_K = np.linalg.slogdet(K)[1] samp = mvnorm(np.array([1000] * num_dims), K, Ki=Ki, L=L, logdet_K=logdet_K).ppf(lds) m_samp = samp.mean(0) def lpost_and_grad(theta): (llh, gr) = mvnorm(theta, K, Ki=Ki, L=L, logdet_K=logdet_K).log_pdf_and_grad(samp) return (llh.sum(), -gr.sum(0).flatten()) theta = np.array([0] * num_dims) mse_cga = ( (m_samp - conjugate_gradient_ascent(theta, lpost_and_grad)[0])**2).mean() mse_ga = ((m_samp - gradient_ascent(theta, lpost_and_grad)[0])**2).mean() print(mse_ga, mse_cga) assert (mse_ga < 10**-5) assert (mse_cga < 10**-5)
def gen_mm_lpost(num_datasets,num_modes, dims, ev_params = [(80, 10), (40,10)], cov_var_const = 1.5, ): def gen_lp_unnorm_ev(lev, mixt): rval = lambda x:mixt.logpdf(x) + lev rval.log_evidence = lev return (rval, lev) rval = [] for ep in ev_params: lev_distr = stats.gamma(ep[0], scale=ep[1]) for i in range(int(num_datasets//len(ev_params))): mode_p = np.random.dirichlet([100] * num_modes) mode_d = [] m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims)*10) while True: try: K = invwishart_rv(np.eye(dims) * cov_var_const , dims) print(K) mode_mean_dist = stats.multivariate_normal(m, K) break except: pass while len(mode_d) != num_modes: try: mode_d.append(mvnorm(mode_mean_dist.rvs(), invwishart_rv(K, dims))) except: #The Matrix from the niw was not invertible. Try again. pass mixt = GMM(num_modes, dims) mixt.comp_lprior = np.log(mode_p) mixt.comp_dist = mode_d rval.append(gen_lp_unnorm_ev(-lev_distr.rvs(), mixt)) return rval
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.DPGMM(covariance_type="full") m.fit(samples) self.num_components = len(m.weights_) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [mvnorm(m.means_[i], np.linalg.inv(m.precs_[i]), Ki = m.precs_[i]) for i in range(self.comp_lprior.size)] self.dim = m.means_[0].size
def gen_proposal(self, ancestor = None): assert(ancestor is not None) assert(ancestor.sample is not None) rval = [] if ancestor.lpost is None: if "gr" not in ancestor.other: (ancestor.lpost, ancestor.other["gr"]) = self.lpost_and_grad(ancestor.sample) else: ancestor.lpost = self.lpost_and_grad(ancestor.sample)[0] elif "gr" not in ancestor.other: ancestor.other["old"] = False ancestor.other["gr"] = self.lpost_and_grad(ancestor.sample)[1] assert(ancestor.other["gr"].size == ancestor.sample.size) if "lrate" in ancestor.other: f = ancestor.other["lrate"] else: f = self.lrate if np.linalg.norm(ancestor.other["gr"]) < 10**-10: #we are close to a local maximum print("jumping") prop_dist = self.jump_dist else: #we are at a distance to a local maximum #step in direction of gradient. assert(ancestor.other["gr"].size == ancestor.sample.size) (f, theta_1, lpost_1, grad_1, back_off_tmp) = find_step_size(ancestor.sample, f, ancestor.lpost, ancestor.other["gr"], func_and_grad = self.lpost_and_grad) self.back_off_count += len(back_off_tmp) if False and ancestor.lprop is not None: for (f, samp, lp, gr) in back_off_tmp: rval.append(PmcSample(ancestor = ancestor, sample = samp, lpost = lp, lprop = ancestor.lprop, lweight = lp - ancestor.lprop, prop_obj = ancestor.prop_obj, other = {"lrate":f, "gr":gr, "old":True})) mean_step = f * self.prop_mean_on_line * ancestor.other["gr"] prop_mean = ancestor.sample + mean_step cov = ideal_covar(mean_step, main_var_scale = self.main_var_scale, other_var = self.other_var, fix_main_var = self.fix_main_var) # , fix_main_var=1 prop_dist = mvnorm(prop_mean, cov) #print(ancestor.lpost, lpost_1) samp = prop_dist.rvs() (lp, gr) = self.lpost_and_grad(samp) print(ancestor.lpost, self.lpost_and_grad(prop_mean)[0]) lprop = prop_dist.logpdf(samp) assert(ancestor.other["gr"].size == ancestor.sample.size) assert(gr.size == samp.size) rval.append(PmcSample(ancestor = ancestor, sample = samp, lpost = lp, lprop = lprop, lweight = lp - lprop, prop_obj = self, other = {"lrate":f, "gr":gr, "old":True})) return rval
def test_Rosenbrock(): np.random.seed(2) def lpost_and_grad(theta, grad=True): fval = -sp.optimize.rosen(theta) if not grad: return fval else: return (fval, -sp.optimize.rosen_der(theta)) lpost = lambda x: lpost_and_grad(x, False) theta = np.array((1, 1)) dim = 2 inits = mvnorm([0] * dim, np.eye(dim) * 5).rvs(10) for i in len(inits): initial = inits[i] ###### MCMC ###### for mk in [ mcmc.GaussMHKernel(lpost, 1, True), mcmc.GaussMHKernel(lpost, np.eye(dim), False), mcmc.ComponentWiseSliceSamplingKernel(lpost) ]: (samp, trace) = mcmc.sample(100, -theta, mk) samp_m = samp[len(samp) // 2:].mean(0) print(mk, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(mk, np.mean((samp_m - theta)**2), samp_m, theta) #assert(False) ###### PMC ###### for (prop, num_samp) in [ (pmc.NaiveRandomWalkProposal(lpost, mvnorm([0] * dim, np.eye(dim) * 5)), 1000), (pmc.GradientAscentProposal(lpost_and_grad, dim, lrate=0.1), 100), (pmc.ConjugateGradientAscentProposal(lpost_and_grad, dim, lrate=0.1), 100) ]: (samp, trace) = pmc.sample(num_samp, [-theta] * 10, prop) #sample_lpost_based samp_m = samp[len(samp) // 2:].mean(0) #samp.mean(0) print(prop, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(prop, np.mean((samp_m - theta)**2), samp_m, theta)
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.DPGMM(covariance_type="full") m.fit(samples) self.num_components = len(m.weights_) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [ mvnorm(m.means_[i], np.linalg.inv(m.precs_[i]), Ki=m.precs_[i]) for i in range(self.comp_lprior.size) ] self.dim = m.means_[0].size
def gen_proposal(self, ancestor = None): self.prepare_ancestor(ancestor) f = ancestor.other["lrate"] if True: (f, theta_1, lpost_1, grad_1, foo) = find_step_size(ancestor.sample, f, ancestor.lpost, ancestor.other["gr"], func_and_grad = self.lpost_and_grad) sc_gr = f * 0.5* ancestor.other["gr"] cov = ideal_covar(sc_gr, fix_main_var = self.main_var, other_var = self.other_var) # , fix_main_var=1 step_dist = mvnorm(sc_gr, cov) #prop_dist = mvnorm(ancestor.sample + sc_gr, self.cov) #print(ancestor.lpost, lpost_1) else: step_dist = mvnorm(np.zeros(ancestor.sample.size), np.eye(ancestor.sample.size)*self.main_var) (new_samp, step) = gen_sample_prototype(ancestor, self, step_dist = step_dist, lpost_and_grad_func = self.lpost_and_grad) new_samp.other["lrate"] = f return new_samp
def test_mvt_mvn_logpdf_n_grad(): # values from R-package bayesm, function dmvt(6.1, a, a) for (mu, var, df, lpdf) in [(np.array((1,1)), np.eye(2), 3, -1.83787707) , (np.array((1,2)), np.eye(2)*3, 3, -2.93648936)]: for dist in [mvt(mu,var,df), mvnorm(mu,var)]: ad = np.mean(np.abs(dist.logpdf(mu) -lpdf )) assert(ad < 10**-8) assert(np.all(opt.check_grad(dist.logpdf, dist.logpdf_grad, mu-1) < 10**-7)) al = [(5,4), (3,3), (1,1)] (cpdf, cgrad) = dist.log_pdf_and_grad(al) (spdf, sgrad) = zip(*[dist.log_pdf_and_grad(m) for m in al]) (spdf, sgrad) = (np.array(spdf), np.array(sgrad)) assert(np.all(cpdf == spdf) and np.all(cpdf == spdf)) assert(sgrad.shape == cgrad.shape) mu = np.array([ 11.56966913, 8.66926112]) obs = np.array([[ 1.31227875, -2.88454287],[ 2.14283061, -2.97526902]]) var = np.array([[ 1.44954579, -1.43116137], [-1.43116137, 3.6207941 ]]) dist = mvnorm(mu, var) assert(np.all(dist.logpdf(obs) - stats.multivariate_normal(mu, var).logpdf(obs) < 10**-7))
def gen_proposal(self, ancestor = None): assert(ancestor is not None) rval = [] old = True if "gr" not in ancestor.other: old = False ancestor.other["old"] = False ancestor.other["gr"] = self.lpost_and_grad(ancestor.sample)[1] assert(ancestor.other["gr"].size == ancestor.sample.size) if "lrate" in ancestor.other: f = ancestor.other["lrate"] else: f = self.lrate if np.linalg.norm(ancestor.other["gr"]) < 10**-10: #we are close to a local maximum print("jumping") prop_dist = self.jump_dist else: #we are at a distance to a local maximum #step in direction of gradient. assert(ancestor.other["gr"].size == ancestor.sample.size) (f, theta_1, lpost_1, grad_1, back_off_tmp) = find_step_size(ancestor.sample, f, ancestor.lpost, ancestor.other["gr"], func_and_grad = self.lpost_and_grad) self.back_off_count += len(back_off_tmp) if False and ancestor.lprop is not None: for (f, samp, lp, gr) in back_off_tmp: rval.append(PmcSample(ancestor = ancestor, sample = samp, lpost = lp, lprop = ancestor.lprop, lweight = lp - ancestor.lprop, prop_obj = ancestor.prop_obj, other = {"lrate":f, "gr":gr, "old":True})) step_mean = f * self.prop_mean_on_line * ancestor.other["gr"] cov = ideal_covar(step_mean, main_var_scale = self.main_var_scale, other_var = self.other_var, fix_main_var = self.fix_main_var) # , fix_main_var=1 prop_dist = mvnorm(step_mean, cov) step = prop_dist.rvs() samp = ancestor.sample + step (lp, gr) = self.lpost_and_grad(samp) lprop = prop_dist.logpdf(step) assert(ancestor.other["gr"].size == ancestor.sample.size) assert(gr.size == samp.size) rval.append(PmcSample(ancestor = ancestor, sample = samp, lpost = lp, lprop = lprop, lweight = lp - lprop, prop_obj = self, other = {"lrate":f, "gr":gr, "old":True})) return rval
def test_find_step_size(): for dim in (2,3): mu = 5*np.ones(dim) cur_val = -mu dist = mvnorm(mu,np.eye(dim)) lpost = dist.logpdf(cur_val) (f, theta_1, lpost_1, grad_1, back_off_tmp) = find_step_size(cur_val, 0.1, lpost, dist.logpdf_grad(cur_val), func_and_grad = lambda x:dist.log_pdf_and_grad(x), func = lambda x:dist.log_pdf_and_grad(x, grad=False)) assert(lpost_1 > lpost) assert(grad_1.size == cur_val.size and grad_1.size == theta_1.size)
def test_MCMC_PMC(include_mcmc=True, include_pmc=True): np.random.seed(2) for dim in [4, 3]: theta = stats.multivariate_normal.rvs(np.array([0] * dim), np.eye(dim) * 10) def lpost_and_grad(x, grad=True): diff = (theta - x) lp = -(100 * diff**2).sum() if not grad: return lp else: gr = 200 * diff return (lp, gr) lpost = lambda x: lpost_and_grad(x, False) if include_mcmc: ###### MCMC ###### for mk in [ mcmc.GaussMHKernel(lpost, 1, True), mcmc.GaussMHKernel(lpost, np.eye(dim), False), mcmc.ComponentWiseSliceSamplingKernel(lpost) ]: (samp, trace) = mcmc.sample(100, -theta, mk) samp_m = samp[len(samp) // 2:].mean(0) print(mk, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(mk, np.mean((samp_m - theta)**2), samp_m, theta) assert (False) if include_pmc: ###### PMC ###### for (prop, num_samp) in [ ( pmc.GrAsProposal(lpost_and_grad, dim, lrate=0.1), 100 ), #(pmc.ConGrAsProposal(lpost_and_grad, dim, lrate = 0.1), 100) (pmc.NaiveRandomWalkProposal( lpost, mvnorm([0] * dim, np.eye(dim) * 5)), 1000), ]: for sample in [pmc.sample]: #pmc.sample, (samp, trace) = sample(num_samp, [-theta] * 10, prop) #sample_lpost_based samp_m = samp.mean(0) #samp.mean(0) print(prop, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(prop, np.mean((samp_m - theta)**2), samp_m, theta) assert (False)
def _m_step(self): assert(self.resp.shape[0] == self.num_samp) pseud_lcount = logsumexp(self.resp, axis = 0).flat r = exp(self.resp) self.comp_dist = [] for c in range(self.num_components): norm = exp(pseud_lcount[c]) mu = np.sum(r[:,c:c+1] * self.samples, axis=0) / norm diff = self.samples - mu scatter_matrix = np.zeros([self.samples.shape[1]]*2) for i in range(diff.shape[0]): scatter_matrix += r[i,c:c+1] *diff[i:i+1,:].T.dot(diff[i:i+1,:]) scatter_matrix /= norm self.comp_dist.append(mvnorm(mu, scatter_matrix)) self.comp_lprior = pseud_lcount - log(self.num_samp)
def __init__(self, lpost_func, var, component_wise = False): """Returns a Metropolis-Hastings Markov kernel with the given step (co)variance. Parameters ---------- lpost_func - posterior measure we want to sample from var - if component_wise is True, this is a scalar variance, else a covariance matrix component_wise - whether to make multivariate step proposals or a proposal for each component Returns ------- kernel - The Metropolis-Hastings Markov kernel """ (var, sh) = MHKernel.check_variance(var, component_wise) super(GaussMHKernel, self).__init__(lpost_func, mvnorm([0]*sh, var), component_wise)
def test_find_step_size(): for dim in (2, 3): mu = 5 * np.ones(dim) cur_val = -mu dist = mvnorm(mu, np.eye(dim)) lpost = dist.logpdf(cur_val) (f, theta_1, lpost_1, grad_1, back_off_tmp) = find_step_size( cur_val, 0.1, lpost, dist.logpdf_grad(cur_val), func_and_grad=lambda x: dist.log_pdf_and_grad(x), func=lambda x: dist.log_pdf_and_grad(x, grad=False)) assert (lpost_1 > lpost) assert (grad_1.size == cur_val.size and grad_1.size == theta_1.size)
def __init__(self, lpost_func, var, component_wise=False): """Returns a Metropolis-Hastings Markov kernel with the given step (co)variance. Parameters ---------- lpost_func - posterior measure we want to sample from var - if component_wise is True, this is a scalar variance, else a covariance matrix component_wise - whether to make multivariate step proposals or a proposal for each component Returns ------- kernel - The Metropolis-Hastings Markov kernel """ (var, sh) = MHKernel.check_variance(var, component_wise) super(GaussMHKernel, self).__init__(lpost_func, mvnorm([0] * sh, var), component_wise)
def _m_step(self): assert (self.resp.shape[0] == self.num_samp) pseud_lcount = logsumexp(self.resp, axis=0).flat r = exp(self.resp) self.comp_dist = [] for c in range(self.num_components): norm = exp(pseud_lcount[c]) mu = np.sum(r[:, c:c + 1] * self.samples, axis=0) / norm diff = self.samples - mu scatter_matrix = np.zeros([self.samples.shape[1]] * 2) for i in range(diff.shape[0]): scatter_matrix += r[i, c:c + 1] * diff[i:i + 1, :].T.dot( diff[i:i + 1, :]) scatter_matrix /= norm self.comp_dist.append(mvnorm(mu, scatter_matrix)) self.comp_lprior = pseud_lcount - log(self.num_samp)
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.GMM(self.num_components, "full") m.fit(samples) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [mvnorm(m.means_[i], m.covars_[i]) for i in range(self.comp_lprior.size)] self.dim = m.means_[0].size #self._e_step() if False: old = -1 i = 0 while not np.all(old == self.resp): i += 1 old = self.resp.copy() self._e_step() self._m_step() print(np.sum(old == self.resp)/self.resp.size) #print("Convergence after",i,"iterations") self.dist_cat = categorical(exp(self.comp_lprior))
def gen_mm_lpost( num_datasets, num_modes, dims, ev_params=[(80, 10), (40, 10)], cov_var_const=1.5, ): def gen_lp_unnorm_ev(lev, mixt): rval = lambda x: mixt.logpdf(x) + lev rval.log_evidence = lev return (rval, lev) rval = [] for ep in ev_params: lev_distr = stats.gamma(ep[0], scale=ep[1]) for i in range(int(num_datasets // len(ev_params))): mode_p = np.random.dirichlet([100] * num_modes) mode_d = [] m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims) * 10) while True: try: K = invwishart_rv(np.eye(dims) * cov_var_const, dims) print(K) mode_mean_dist = stats.multivariate_normal(m, K) break except: pass while len(mode_d) != num_modes: try: mode_d.append( mvnorm(mode_mean_dist.rvs(), invwishart_rv(K, dims))) except: #The Matrix from the niw was not invertible. Try again. pass mixt = GMM(num_modes, dims) mixt.comp_lprior = np.log(mode_p) mixt.comp_dist = mode_d rval.append(gen_lp_unnorm_ev(-lev_distr.rvs(), mixt)) return rval
def gen_gauss_diag_lpost(num_datasets, dims, ev_params=[(80, 10), (40, 10)], cov_var_const=4, with_grad=False): def gen_lp_unnorm_ev(lev, distr_norm, with_grad=False): # print(distr_norm.mu, distr_norm.K) rval = lambda x: distr_norm.logpdf(x) + lev rval.log_evidence = lev if with_grad: rval.lpdf_and_grad = lambda x, pdf, grad: distr_norm.log_pdf_and_grad( x, pdf, grad) return rval rval = [] for ep in ev_params: lev_distr = stats.gamma(ep[0], scale=ep[1]) for i in range(int(num_datasets // len(ev_params))): while True: try: m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims) * 1000) K = np.eye(dims) val = gen_lp_unnorm_ev(-lev_distr.rvs(), mvnorm(m, K), with_grad=with_grad) val.mean = m val.cov = K rval.append(val) break except np.linalg.LinAlgError: import sys #The Matrix from the niw was not invertible. Try again. print("np.linalg.LinAlgError - trying again", file=sys.stderr) pass return rval
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.GMM(self.num_components, "full") m.fit(samples) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [ mvnorm(m.means_[i], m.covars_[i]) for i in range(self.comp_lprior.size) ] self.dim = m.means_[0].size #self._e_step() if False: old = -1 i = 0 while not np.all(old == self.resp): i += 1 old = self.resp.copy() self._e_step() self._m_step() print(np.sum(old == self.resp) / self.resp.size) #print("Convergence after",i,"iterations") self.dist_cat = categorical(exp(self.comp_lprior))
def test_MCMC_PMC(include_mcmc = True, include_pmc = True): np.random.seed(2) for dim in [4, 3]: theta = stats.multivariate_normal.rvs(np.array([0]*dim), np.eye(dim)*10) def lpost_and_grad(x, grad = True): diff = (theta-x) lp = -(100*diff**2).sum() if not grad: return lp else: gr = 200 * diff return (lp, gr) lpost = lambda x: lpost_and_grad(x, False) if include_mcmc: ###### MCMC ###### for mk in [mcmc.GaussMHKernel(lpost, 1, True), mcmc.GaussMHKernel(lpost, np.eye(dim), False), mcmc.ComponentWiseSliceSamplingKernel(lpost)]: (samp, trace) = mcmc.sample(100, -theta, mk) samp_m = samp[len(samp)//2:].mean(0) print(mk, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(mk, np.mean((samp_m - theta)**2), samp_m, theta) assert(False) if include_pmc: ###### PMC ###### for (prop, num_samp) in [(pmc.GrAsProposal(lpost_and_grad, dim, lrate = 0.1), 100),#(pmc.ConGrAsProposal(lpost_and_grad, dim, lrate = 0.1), 100) (pmc.NaiveRandomWalkProposal(lpost, mvnorm([0]*dim, np.eye(dim)*5)), 1000), ]: for sample in [pmc.sample]:#pmc.sample, (samp, trace) = sample(num_samp, [-theta]*10, prop) #sample_lpost_based samp_m = samp.mean(0)#samp.mean(0) print(prop, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(prop, np.mean((samp_m - theta)**2), samp_m, theta) assert(False)
def est_stats(estimates): print("mse", (estimates**2).mean()) n_estimates = [] bu_estimates = [] bu_indiv_sets_estimates =[] bu_rew_estimates = [] if True: M = 100 K = 2 log_evid = -1000 (mu_true, K_true, offset) = (np.ones(K), np.eye(K)*2, 5) post_param = (mu_true, K_true) post = mvnorm(*post_param) post_lpdf = lambda x: post.logpdf(x) + log_evid prop_param = (mu_true+offset, K_true, 20) prop = mvt(*prop_param) prop_lpdf = lambda x: prop.logpdf(x) #check_tails(post, mu_true, prop) perm_x = [] perm_weights = [] for x in [prop.rvs(M) for _ in range(200)]: #plain_weights = log_imp_weight(x) perm_x.append(permutations(x)) perm_weights.append([log_imp_weight(p, post_lpdf, prop_lpdf) for p in perm_x[-1]])
def lpost_and_grad(theta): (llh, gr) = mvnorm(theta, K, Ki=Ki, L=L, logdet_K=logdet_K).log_pdf_and_grad(samp) return (llh.sum(), -gr.sum(0).flatten())
grad_sqerr = [] cgrad_sqerr = [] slice_sqerr = [] ll_count = np.atleast_1d(0) llg_count = np.atleast_1d(0) grad_llc = (0,0) cgrad_llc = (0,0) ng_llc = (0,0) ss_llc = 0 ds_c = 0 est = {} num_est_samp = np.linspace(-100.,0.,15,) # None #-np.logspace(1, np.log10(num_post_samp), 15, base=10).astype(int) prior = mvnorm(np.array([0]*num_dims), np.eye(num_dims) * 50) ad = [] ess_nograd = [] ess_grad = [] pop_size = 4 for num_obs in [1]: est[num_obs] = {"GroundTruth":[]} for estim in ["pmc","cgpmc","gpmc","slicesamp", "slice_half"]: est[num_obs][estim] = [] for lpost in posteriors: naive_proposals = pmc.CategoricalOracle(pmc.GaussRwProposal(None,np.eye(num_dims)*10))
def _update_data_dist(self): self.ddist = mvnorm(*norm_invwishart(self.K, self.nu, self.mu, self.kappa).rv())
from numpy import exp, log, sqrt from scipy.misc import logsumexp from autograd import grad, hessian import distributions as dist #import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt x = np.linspace(-2, 8, 1000) sp.random.seed(2) targd = dist.mixt(1, [ dist.mvnorm(np.ones(1) + 1, np.ones(1)), dist.mvnorm(np.ones(1) + 3.8, np.ones(1)) ], [0.8, 0.2]) q0 = dist.mvnorm(np.ones(1), np.ones(1) * 3) q1 = dist.mvnorm(np.ones(1), np.ones(1) * 0.1) fig, ax = plt.subplots(figsize=(6, 3)) ax.plot(x, exp(targd.logpdf(x)) / exp(targd.logpdf(x)).max(), label='target density', linewidth=2) ax.plot(x, exp(q0.logpdf(x)) / exp(q0.logpdf(x)).max() / 2, '-.', label='q_0(.|1)', linewidth=2)
from numpy import exp, log, sqrt from scipy.misc import logsumexp from autograd import grad, hessian import distributions as dist #import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt x = np.linspace(-2, 8, 1000) sp.random.seed(2) targd = dist.mixt(1, [ dist.mvnorm(np.ones(1), np.ones(1)), dist.mvnorm(np.ones(1) + 3.8, np.ones(1)) ], [0.7, 0.3]) q0 = dist.mvnorm(np.ones(1) + 3, np.ones(1) * 2) samps = q0.rvs(20) lw = targd.logpdf(samps).flatten() - q0.logpdf(samps) lw = lw - logsumexp(lw) q1 = dist.mixt(1, [dist.mvnorm(mu, np.ones(1)) for mu in samps], lw.flatten(), comp_w_in_logspace=True) fig, ax = plt.subplots(figsize=(6, 3)) ax.plot(x, exp(targd.logpdf(x)), label='target density', linewidth=2) ax.plot(x, exp(q0.logpdf(x)), '-.', label='q0', linewidth=2) ax.plot(x, exp(q1.logpdf(x)), '--', label='q1', linewidth=2) ax.legend(loc='best') ax.set_xticks([])
from numpy import exp, log, sqrt from scipy.misc import logsumexp from autograd import grad, hessian import distributions as dist #import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt x = np.linspace(-2, 8, 1000) sp.random.seed(5) targd = dist.mixt(1, [ dist.mvnorm(np.ones(1) + 1, np.ones(1)), dist.mvnorm(np.ones(1) + 3.8, np.ones(1)) ], [0.8, 0.2]) qrw = dist.mvnorm(np.zeros(1), np.ones(1) * 0.4) qind = dist.mvnorm(np.ones(1) * 2, np.ones(1) * 0.2) fig, ax = plt.subplots(figsize=(4, 2)) ax.plot(x, exp(targd.logpdf(x)) / exp(targd.logpdf(x)).max(), label=r'$\pi$', linewidth=2) for i in range(3): current = targd.rvs().flatten() ax.plot(x, exp(qrw.logpdf(x - current)) / exp(qrw.logpdf(x - current)).max() /
def __init__(self, lpost_and_grad_func, cov): self.lpost_and_grad = lpost_and_grad_func self.lpost = lambda x: self.lpost_and_grad(x, False) self.step_dist = mvnorm([0] * cov.shape[0], cov)
def logpost_unnorm(posterior_samples): return np.array([mvnorm(mean, K_li).logpdf(D).sum() for mean in posterior_samples]) + pr.logpdf(posterior_samples)
num_datasets = num_datasets) #assert() ## MODEL Likelihood # mu_li ~ N(mu_pr, sd_pr) K_li = np.eye(dims) #+ np.ones((dims,dims)) ## MODEL prior ## mu_pr = np.zeros(dims) nu_pr = 5 K_pr = np.eye(dims) * 100 kappa_pr = 5 pr = mvnorm(mu_pr, K_pr) lowdisc_seq_sob = i4_sobol_generate(dims, num_imp_samples , 2).T est = {} num_est_samp = np.logspace(1, np.log10(num_imp_samples), 15, base=10).astype(int) for num_obs in datasets: est[num_obs] = {"GroundTruth":[]} for estim in ["qis(sobol)","is","priorIs"]: est[num_obs][estim] = [] for ds in datasets[num_obs]: D = ds["obs"] ## Analytic evidence
def lpost(x): return mvnorm(x, K_li).logpdf(D).sum() + prior.logpdf(x)
import scipy.io as io of3 = io.loadmat("data/oilFlow3Class.mat") of3_lab = np.vstack(( of3["DataTrnLbls"], of3["DataTstLbls"], of3["DataVdnLbls"], )) of3 = np.vstack(( of3["DataTrn"], of3["DataTst"], of3["DataVdn"], )) * 100 initial = [ DirCatTMM(of3, [1] * 3, dist.mvnorm([0] * 12, np.eye(12)), dist.invwishart(np.eye(12) * 5, 12), stats.gamma(1, scale=1)) for _ in range(10) ] count = { "local_lpost": 0, "local_llhood": 0, "naive_lpost": 0, "naive_llhood": 0 } def count_closure(name): def rval(): count[name] = count[name] + 1
import scipy as sp import scipy.stats as stats from numpy import exp, log, sqrt from scipy.misc import logsumexp from numpy.linalg import inv import pylab import mc #McSample import distributions as dist num_samp = 1000 post_d = dist.mvnorm(0,100)# stats.norm(0,10) lpost = post_d.logpdf s_high = mc.mcmc.sample(num_samp+200, np.zeros(1), mc.mcmc.GaussMHKernel(lpost,1))[0][-num_samp:] s_low = mc.mcmc.sample(num_samp+200, np.zeros(1), mc.mcmc.GaussMHKernel(lpost,10000))[0][-num_samp:] s_opt = mc.mcmc.sample(num_samp+200, np.zeros(1), mc.mcmc.GaussMHKernel(lpost,550))[0][-num_samp:] # for 1d target: acceptance rate around 0.44 is optimal s_opt_mala = mc.mcmc.sample(num_samp+200, np.zeros(1), mc.mcmc.MalaKernel(post_d.log_pdf_and_grad, 1, 340))[0][-num_samp:] # for 1d target: acceptance rate around 0.57 is optimal s_am = mc.mcmc.sample(num_samp+200, np.zeros(1), mc.mcmc.HaarioKernel(post_d.logpdf, 0, 1))[0][-num_samp:] s_iid = post_d.rvs(num_samp) f, ax = pylab.subplots(2,2,sharex=True,sharey=True) ax[0][0].plot(np.arange(num_samp), s_high)
def __init__(self, mu1, K1, p, mu2, K2): assert (p > 0 and p < 1) self.p = stats.bernoulli(p) self.d1 = mvnorm(mu1, K1) self.d2 = mvnorm(mu2, K2)
infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()), count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"],"\n\n") return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)} if __name__ == "__main__": import scipy.io as io of3 = io.loadmat("data/oilFlow3Class.mat") of3_lab = np.vstack((of3["DataTrnLbls"], of3["DataTstLbls"],of3["DataVdnLbls"],)) of3 = np.vstack((of3["DataTrn"], of3["DataTst"],of3["DataVdn"],))*100 initial = [DirCatTMM(of3, [1]*3, dist.mvnorm([0]*12, np.eye(12)), dist.invwishart(np.eye(12)*5, 12), stats.gamma(1,scale=1)) for _ in range(10)] count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0} def count_closure(name): def rval(): count[name] = count[name] + 1 return rval samps = pmc.sample(50, initial, DirCatTMMProposal(lpost_count = count_closure("naive_lpost"), llhood_count = count_closure("naive_lpost")), population_size=5, quiet=False)
class Post(object): def __init__(self, mu1, K1, p, mu2, K2): assert (p > 0 and p < 1) self.p = stats.bernoulli(p) self.d1 = mvnorm(mu1, K1) self.d2 = mvnorm(mu2, K2) def logpdf(self, x): return logsumexp([ self.p.logpmf(1) + self.d1.logpdf(x), self.p.logpmf(0) + self.d2.logpdf(x) ]) post = mvnorm(mu_true, K_true) # Post(mu_true, K_true, 0.3, mu_true + offset, K_true)# exp_true = mu_true prop = mvt(mu_true, K_true, 2.0000001) check_tails(post, exp_true, prop) n_estimates = [] bu_estimates = [] bu_indiv_sets_estimates = [] for x in [prop.rvs(100) for _ in range(20)]: n_estimates.append(np.linalg.norm(est_plain(x, post, prop) - exp_true, 2)) bu_estimates.append(np.linalg.norm(est_bu(x, post, prop) - exp_true, 2)) bu_indiv_sets_estimates.append( np.linalg.norm(est_bu_indiv_sets(x, post, prop) - exp_true, 2,
num_datasets = 50 if False: dims = 3 num_obs=10 num_post_samples = 100 num_imp_samples=1000 num_datasets=30 ## Data generation ## print("generating Data") logpost_ev = synthdata.gen_mm_lpost(num_datasets, dims) #gen_gauss_lpost(num_datasets, dims, cov_var_const=8) #exit(0) pr = mvnorm(np.zeros(dims), np.eye(dims) * 10) print("Low discr sequence") #lowdisc_seq_sob = i4_sobol_generate(dims + 1, num_imp_samples , 2).T %load_ext rmagic %R require(randtoolbox) %R -i num_imp_samples,dims -o lowdisc_seq_sob lowdisc_seq_sob <- sobol(num_imp_samples, dims + 1, scrambling = 0) #%R -i num_imp_samples,dims -o rdzd_lowdisc_seq_sob rdzd_lowdisc_seq_sob <- sobol(num_imp_samples, dims + 1, seed = sample(1:30000, 1, TRUE), scrambling = 1) est = {} num_est_samp = np.logspace(1, np.log10(num_imp_samples), 15, base=10).astype(int) ds = 0
def lpost_and_grad(theta): (llh, gr) = mvnorm(theta, K, Ki = Ki, L = L, logdet_K = logdet_K).log_pdf_and_grad(samp) return (llh.sum(), -gr.sum(0).flatten())
def lpost(x): return mvnorm(x, K_li).logpdf(D).sum() + prior.logpdf(x)
def __init__(self, lpost_and_grad_func, cov): self.lpost_and_grad = lpost_and_grad_func self.lpost = lambda x:self.lpost_and_grad(x, False) self.step_dist = mvnorm([0]*cov.shape[0], cov)
num_obs, num_obs + 1, 10), num_datasets=num_datasets) #assert() ## MODEL Likelihood # mu_li ~ N(mu_pr, sd_pr) K_li = np.eye(dims) #+ np.ones((dims,dims)) ## MODEL prior ## mu_pr = np.zeros(dims) nu_pr = 5 K_pr = np.eye(dims) * 100 kappa_pr = 5 pr = mvnorm(mu_pr, K_pr) lowdisc_seq_sob = i4_sobol_generate(dims, num_imp_samples, 2).T est = {} num_est_samp = np.logspace(1, np.log10(num_imp_samples), 15, base=10).astype(int) for num_obs in datasets: est[num_obs] = {"GroundTruth": []} for estim in ["qis(sobol)", "is", "priorIs"]: est[num_obs][estim] = [] for ds in datasets[num_obs]: D = ds["obs"] ## Analytic evidence ((mu_post, K_post, Ki_post),
def logpost_unnorm(posterior_samples): return np.array([ mvnorm(mean, K_li).logpdf(D).sum() for mean in posterior_samples ]) + pr.logpdf(posterior_samples)
import scipy.stats as stats from numpy import exp, log, sqrt from scipy.misc import logsumexp from autograd import grad, hessian import distributions as dist #import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt x = np.linspace(-2, 8, 1000) targd = dist.mixt(1, [ dist.mvnorm(np.ones(1) + 2, np.ones(1)), dist.mvnorm(np.ones(1) + 3.8, np.ones(1)) ], [0.7, 0.3]) g = grad(targd.logpdf) h = hessian(targd.logpdf) res = sp.optimize.minimize_scalar(lambda x: -targd.logpdf(x)) #maximum = 3.44515 maximum = res['x'] print("Gradient at Maximum logpdf ", g(maximum)) #mpl.style.use('seaborn') fig, ax = plt.subplots(figsize=(5, 3)) ax.plot(x, exp(targd.logpdf(x)), label='target density', linewidth=2) ax.plot(x, exp(dist.mvnorm(maximum, 1. / -h(maximum)).logpdf(x)), '--',