def approximate_mixture_data(): num_loc_proposals = 2 num_imp_samp = 1000 n_comp = 2 p_comp = np.array([0.7, 0.3]) dim = 1 num_obs = 100 obs = None means = [] for i in range(n_comp): means.append([20*i]*dim) if obs is None: obs = dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i]))) else: obs = np.vstack([obs, dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))]) count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0} print(means) #return def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append(DirCatTMM(obs, [1]*n_comp, dist.mvt(np.mean(means,0), np.eye(dim)*5, dim), dist.invwishart(np.eye(dim) * 5, dim+1 ), stats.gamma(1,scale=1))) # (naive_samp, naive_lpost) = pmc.sample(num_imp_samp, initial_samples, # DirCatTMMProposal(naive_multi_proposals = num_loc_proposals, # lpost_count = count_closure("naive_lpost"), # llhood_count = count_closure("naive_llhood")), # population_size = 4) (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples, DirCatTMMProposal(num_local_proposals = num_loc_proposals, lpost_count = count_closure("local_lpost"), llhood_count = count_closure("local_llhood")), population_size = 4) (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples, DirCatTMMProposal(lpost_count = count_closure("standard_lpost"), llhood_count = count_closure("standard_llhood")), population_size = 4) print("===============\n",p_comp, means, # "\n\n--NAIVE--\n", # naive_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, naive_samp[-1].comp_indic.sum(0))+1, count["naive_llhood"], count["naive_lpost"], "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].comp_indic.sum(0))+1, count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].comp_indic.sum(0))+1, count["standard_llhood"], count["standard_lpost"],"\n\n") return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
def test_DirCatTMMProposal(): num_loc_proposals = 2 num_imp_samp = 1000 n_comp = 2 p_comp = np.array([0.7, 0.3]) dim = 1 num_obs = 100 obs = None means = [] for i in range(n_comp): means.append([20*i]*dim) if obs is None: obs = dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i]))) else: obs = np.vstack([obs, dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))]) count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0} print(means) #return def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append(dis.DirCatTMM(obs, [1]*n_comp, dist.mvt(np.mean(means,0), np.eye(dim)*5, dim), dist.invwishart(np.eye(dim) * 5, dim+1 ), stats.gamma(1,scale=1))) # (naive_samp, naive_lpost) = pmc.sample(num_imp_samp, initial_samples, # dis.DirCatTMMProposal(naive_multi_proposals = num_loc_proposals, # lpost_count = count_closure("naive_lpost"), # llhood_count = count_closure("naive_llhood")), # population_size = 4) (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples, dis.DirCatTMMProposal(num_local_proposals = num_loc_proposals, lpost_count = count_closure("local_lpost"), llhood_count = count_closure("local_llhood")), population_size = 4) (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples, dis.DirCatTMMProposal(lpost_count = count_closure("standard_lpost"), llhood_count = count_closure("standard_llhood")), population_size = 4) print("===============\n",p_comp, means, # "\n\n--NAIVE--\n", # naive_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, naive_samp[-1].comp_indic.sum(0))+1, count["naive_llhood"], count["naive_lpost"], "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].comp_indic.sum(0))+1, count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].comp_indic.sum(0))+1, count["standard_llhood"], count["standard_lpost"],"\n\n")
def test_Rosenbrock(): np.random.seed(2) def lpost_and_grad(theta, grad = True): fval = -sp.optimize.rosen(theta) if not grad: return fval else: return (fval, -sp.optimize.rosen_der(theta)) lpost = lambda x: lpost_and_grad(x, False) theta=np.array((1, 1)) dim = 2 inits = mvnorm([0]*dim, np.eye(dim)*5).rvs(10) for i in len(inits): initial = inits[i] ###### MCMC ###### for mk in [mcmc.GaussMHKernel(lpost, 1, True), mcmc.GaussMHKernel(lpost, np.eye(dim), False), mcmc.ComponentWiseSliceSamplingKernel(lpost)]: (samp, trace) = mcmc.sample(100, -theta, mk) samp_m = samp[len(samp)//2:].mean(0) print(mk, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(mk, np.mean((samp_m - theta)**2), samp_m, theta) #assert(False) ###### PMC ###### for (prop, num_samp) in [(pmc.NaiveRandomWalkProposal(lpost, mvnorm([0]*dim, np.eye(dim)*5)), 1000), (pmc.GradientAscentProposal(lpost_and_grad, dim, lrate = 0.1), 100), (pmc.ConjugateGradientAscentProposal(lpost_and_grad, dim, lrate = 0.1), 100)]: (samp, trace) = pmc.sample(num_samp, [-theta]*10, prop) #sample_lpost_based samp_m = samp[len(samp)//2:].mean(0)#samp.mean(0) print(prop, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(prop, np.mean((samp_m - theta)**2), samp_m, theta)
def approximate_iris_mixture_data(): from sklearn.datasets import load_iris num_imp_samp = 100 num_loc_proposals = 3 n_comp = 3 p_comp = np.array([1/n_comp] * n_comp) dim = 4 iris = load_iris() obs = iris["data"] labels = iris["target"] means = np.array([obs[i*50:(i+1)*50].mean(0) for i in range(3)]) count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0} def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append(DirCatTMM(obs, [1]*n_comp, dist.mvt(obs.mean(0), np.diag(obs.var(0)), 20), dist.invwishart(np.eye(dim), 50), stats.gamma(500, scale=0.1))) (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples, DirCatTMMProposal(num_local_proposals = num_loc_proposals, lpost_count = count_closure("local_lpost"), llhood_count = count_closure("local_llhood")), population_size = 4) (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples, DirCatTMMProposal(lpost_count = count_closure("standard_lpost"), llhood_count = count_closure("standard_llhood")), population_size = 4) print("===============\n",p_comp, means, "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()), count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"],"\n\n") return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
def test_Rosenbrock(): np.random.seed(2) def lpost_and_grad(theta, grad=True): fval = -sp.optimize.rosen(theta) if not grad: return fval else: return (fval, -sp.optimize.rosen_der(theta)) lpost = lambda x: lpost_and_grad(x, False) theta = np.array((1, 1)) dim = 2 inits = mvnorm([0] * dim, np.eye(dim) * 5).rvs(10) for i in len(inits): initial = inits[i] ###### MCMC ###### for mk in [ mcmc.GaussMHKernel(lpost, 1, True), mcmc.GaussMHKernel(lpost, np.eye(dim), False), mcmc.ComponentWiseSliceSamplingKernel(lpost) ]: (samp, trace) = mcmc.sample(100, -theta, mk) samp_m = samp[len(samp) // 2:].mean(0) print(mk, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(mk, np.mean((samp_m - theta)**2), samp_m, theta) #assert(False) ###### PMC ###### for (prop, num_samp) in [ (pmc.NaiveRandomWalkProposal(lpost, mvnorm([0] * dim, np.eye(dim) * 5)), 1000), (pmc.GradientAscentProposal(lpost_and_grad, dim, lrate=0.1), 100), (pmc.ConjugateGradientAscentProposal(lpost_and_grad, dim, lrate=0.1), 100) ]: (samp, trace) = pmc.sample(num_samp, [-theta] * 10, prop) #sample_lpost_based samp_m = samp[len(samp) // 2:].mean(0) #samp.mean(0) print(prop, np.mean((samp_m - theta)**2)) if np.mean((samp_m - theta)**2) > 4: print(prop, np.mean((samp_m - theta)**2), samp_m, theta)
return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)} if __name__ == "__main__": import scipy.io as io of3 = io.loadmat("data/oilFlow3Class.mat") of3_lab = np.vstack((of3["DataTrnLbls"], of3["DataTstLbls"],of3["DataVdnLbls"],)) of3 = np.vstack((of3["DataTrn"], of3["DataTst"],of3["DataVdn"],))*100 initial = [DirCatTMM(of3, [1]*3, dist.mvnorm([0]*12, np.eye(12)), dist.invwishart(np.eye(12)*5, 12), stats.gamma(1,scale=1)) for _ in range(10)] count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0} def count_closure(name): def rval(): count[name] = count[name] + 1 return rval samps = pmc.sample(50, initial, DirCatTMMProposal(lpost_count = count_closure("naive_lpost"), llhood_count = count_closure("naive_lpost")), population_size=5, quiet=False)
def approximate_iris_mixture_data(): from sklearn.datasets import load_iris num_imp_samp = 100 num_loc_proposals = 3 n_comp = 3 p_comp = np.array([1 / n_comp] * n_comp) dim = 4 iris = load_iris() obs = iris["data"] labels = iris["target"] means = np.array([obs[i * 50:(i + 1) * 50].mean(0) for i in range(3)]) count = { "local_lpost": 0, "local_llhood": 0, "naive_lpost": 0, "naive_llhood": 0, "standard_lpost": 0, "standard_llhood": 0 } def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append( DirCatTMM(obs, [1] * n_comp, dist.mvt(obs.mean(0), np.diag(obs.var(0)), 20), dist.invwishart(np.eye(dim), 50), stats.gamma(500, scale=0.1))) (infl_samp, infl_lpost) = pmc.sample( num_imp_samp, initial_samples, DirCatTMMProposal(num_local_proposals=num_loc_proposals, lpost_count=count_closure("local_lpost"), llhood_count=count_closure("local_llhood")), population_size=4) (stand_samp, stand_lpost) = pmc.sample( num_imp_samp * num_loc_proposals, initial_samples, DirCatTMMProposal(lpost_count=count_closure("standard_lpost"), llhood_count=count_closure("standard_llhood")), population_size=4) print("===============\n", p_comp, means, "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()), count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"], "\n\n") return { "infl": (infl_samp, infl_lpost), "standard": (stand_samp, stand_lpost) }
of3["DataTst"], of3["DataVdn"], )) * 100 initial = [ DirCatTMM(of3, [1] * 3, dist.mvnorm([0] * 12, np.eye(12)), dist.invwishart(np.eye(12) * 5, 12), stats.gamma(1, scale=1)) for _ in range(10) ] count = { "local_lpost": 0, "local_llhood": 0, "naive_lpost": 0, "naive_llhood": 0 } def count_closure(name): def rval(): count[name] = count[name] + 1 return rval samps = pmc.sample(50, initial, DirCatTMMProposal( lpost_count=count_closure("naive_lpost"), llhood_count=count_closure("naive_lpost")), population_size=5, quiet=False)
(s_gibbs_slice, t_gibbs_slice) = mcmc.sample(num_post_samp, theta, mcmc.ComponentWiseSliceSamplingKernel(lp), stop_flag = stop_flag)#np.array(s_gibbs_slice) stop_flag.set_max_time() print(stop_flag.elapsed()) est[num_obs]["slicesamp"].append(mean_of_samples(s_gibbs_slice, num_est_samp)) est[num_obs]["slice_half"].append(mean_of_samples(s_gibbs_slice[len(s_gibbs_slice)//2:], num_est_samp)) ss_llc += stop_flag.lhood stop_flag.reset() #ga_theta = gradient_ascent(prior.rvs(), lpost_and_grad) #print( o_m, ga_theta[0]) #continue #exit(0) (s_nograd, t_nograd, ess_nograd_cur) = pmc.sample(num_post_samp**2, [prior.rvs() for _ in range(10)], naive_proposals, population_size = pop_size, stop_flag = stop_flag, ess = True) print(stop_flag.elapsed()) est[num_obs]["pmc"].append(mean_of_samples(s_nograd, num_est_samp)) ng_llc = (ng_llc[0] + int(stop_flag.lhood), ng_llc[1] + int(stop_flag.grad)) ess_nograd.append(ess_nograd_cur) stop_flag.reset() (s_grad, t_grad, ess_grad_cur) = pmc.sample(num_post_samp**2, [prior.rvs() for _ in range(10)], grad_proposals, population_size = pop_size, stop_flag = stop_flag, ess = True) print(stop_flag.elapsed())