def __init__( self, data, prior_eta = GammaPrior(2., 0.5), prior_mu = (0., 4.), prior_Sigma = (10, 0.5), prior_xi = GammaPrior(1., 1.), prior_tau = GammaPrior(2., 2.), p = 10, max_clust_count = 300, ntemps = 5, stepping = 1.3, ): self.data = data self.max_clust_count = max_clust_count self.p = p self.nCol = self.data.nCol self.nDat = self.data.nDat _prior_mu = NormalPrior( np.ones(self.nCol) * prior_mu[0], np.eye(self.nCol) * np.sqrt(prior_mu[1]), np.eye(self.nCol) / np.sqrt(prior_mu[1]), ) _prior_Sigma = InvWishartPrior( self.nCol + prior_Sigma[0], np.eye(self.nCol) * prior_Sigma[1], ) self.priors = Prior(prior_eta, _prior_mu, _prior_Sigma, prior_xi, prior_tau) self.set_projection() self.itl = 1 / stepping**np.arange(ntemps) self.nTemp = ntemps self.temp_unravel = np.repeat(np.arange(self.nTemp), self.nDat) self.nSwap_per = self.nTemp // 2 self.swap_start = 100 return
def __init__( self, data, prior_eta=GammaPrior(2., 0.5), prior_alpha=GammaPrior(1., 1.), prior_beta=GammaPrior(1., 1.), p=10, max_clust_count=300, ntemps=3, stepping=1.05, ): assert type(data) is MixedData self.data = data self.max_clust_count = max_clust_count self.p = p self.nCat = self.data.nCat self.nCol = self.data.nCol self.tCol = self.nCol + self.nCat self.nDat = self.data.nDat self.nCats = self.data.Cats.shape[0] self.priors = Prior(prior_eta, prior_alpha, prior_beta) self.set_projection() self.categorical_considerations() # self.pool = Pool(processes = 8, initializer = limit_cpu()) self.nTemp = ntemps self.itl = 1 / stepping**np.arange(ntemps) self.temp_unravel = np.repeat(np.arange(self.nTemp), self.nDat) self.nSwap_per = self.nTemp // 2 self.swap_start = 100 return
def __init__( self, data, prior_eta=GammaPrior(2., 0.5), prior_alpha=GammaPrior(0.5, 0.5), prior_beta=GammaPrior(0.5, 0.5), prior_xi=GammaPrior(0.5, 0.5), prior_tau=GammaPrior(0.5, 0.5), p=10, max_clust_count=300, ): assert type(data) is MixedData self.data = data self.max_clust_count = max_clust_count self.p = p self.nCat = self.data.nCat self.nCol = self.data.nCol self.nDat = self.data.nDat self.nCats = self.data.Cats.shape[0] self.priors = Prior(prior_eta, prior_alpha, prior_beta, prior_xi, prior_tau) self.set_projection() self.categorical_considerations() self.build_sigma_unity() # self.pool = Pool(processes = 8, initializer = limit_cpu()) return
def __init__( self, data, prior_zeta=GammaPrior(0.5, 0.5), prior_sigma=GammaPrior(2., 2.), ): self.data = data self.nCol = self.data.nCol self.nDat = self.data.nDat self.priors = Prior(prior_zeta, prior_sigma) return
def __init__(self, data, nMix, prior_alpha=GammaPrior(0.5, 0.5), prior_beta=GammaPrior(2., 2.), prior_pi=DirichletPrior(1.)): self.data = data self.nCol = self.data.nCol self.nDat = self.data.nDat self.nMix = nMix self.priors = Prior(prior_alpha, prior_beta, prior_pi) # self.pool = Pool(processes = 8) return
def __init__( self, data, prior_alpha=GammaPrior(1., 1.), prior_beta=GammaPrior(1., 1.), prior_eta=GammaPrior(2., 0.5), m=20, ): self.m = m self.data = data self.nCol = self.data.nCol self.nDat = self.data.nDat self.priors = Prior(prior_alpha, prior_beta, prior_eta) # self.pool = Pool(8) return
def __init__( self, data, prior_eta = GammaPrior(2., 0.5), prior_alpha = GammaPrior(0.5, 0.5), prior_beta = GammaPrior(2., 2.), prior_xi = GammaPrior(0.5, 0.5), prior_tau = GammaPrior(2., 2.), m = 20, ): self.data = data self.m = m self.nCol = self.data.nCol self.nDat = self.data.nDat self.priors = Prior(prior_eta, prior_alpha, prior_beta, prior_xi, prior_tau) return
def __init__( self, data, nMix, prior_pi = DirichletPrior(0.5,), prior_alpha = GammaPrior(0.5, 0.5), prior_beta = GammaPrior(2., 2.), prior_xi = GammaPrior(0.5, 0.5), prior_tau = GammaPrior(2., 2.), ): self.data = data self.nMix = nMix self.nCol = self.data.nCol self.nDat = self.data.nDat self.priors = Prior(prior_pi, prior_alpha, prior_beta, prior_xi, prior_tau) return
def __init__( self, data, prior_mu=(0., 1.), # NormalPrior(np.zeros(7), np.eye(7) * 1.), prior_mu0=(0., 1.), # NormalPrior(np.zeros(7), np.eye(7) * 0.125), prior_Sigma=(5, 1.), # InvWishartPrior(10, np.eye(7) * 1), prior_Sigma0=(5, 1.), #InvWishartPrior(10, np.eye(7) * 1), prior_eta=GammaPrior(2, 10), m=20, ): self.m = m self.data = data self.nCol = self.data.nCol self.nDat = self.data.nDat mu_actual = NormalPrior( np.ones(self.nCol - 1) * prior_mu[0], np.eye(self.nCol - 1) * prior_mu[1], ) mu0_actual = NormalPrior( np.ones(self.nCol - 1) * prior_mu0[0], np.eye(self.nCol - 1) * prior_mu[1], ) Sigma_actual = InvWishartPrior( self.nCol + prior_Sigma[0], np.eye(self.nCol - 1) * prior_Sigma[1], ) Sigma0_actual = InvWishartPrior( self.nCol + prior_Sigma0[0], np.eye(self.nCol - 1) * prior_Sigma0[1], ) self.priors = Prior(mu_actual, Sigma_actual, mu0_actual, Sigma0_actual, prior_eta) self.pool = mp.Pool(processes=8, initializer=limit_cpu) return
def __init__( self, data, prior_eta=GammaPrior(2., 0.5), prior_alpha=GammaPrior(0.5, 0.5), prior_beta=GammaPrior(2., 2.), p=10, max_clust_count=300, ): self.data = data self.max_clust_count = max_clust_count self.p = p self.nCol = self.data.nCol self.nDat = self.data.nDat self.priors = Prior(prior_eta, prior_alpha, prior_beta) self.set_projection() return
def __init__( self, data, prior_eta=GammaPrior(2., 1.), prior_alpha=GammaPrior(0.5, 0.5), prior_beta=GammaPrior(2., 2.), p=10, max_clust_count=300, ): self.data = data self.max_clust_count = max_clust_count self.p = p self.nCat = self.data.nCat self.nDat = self.data.nDat self.spheres = self.data.spheres self.sphere_mat = np.zeros((len(self.spheres), self.nCat)) for i, sphere in enumerate(self.spheres): self.sphere_mat[i][sphere] = True self.priors = Prior(prior_eta, prior_alpha, prior_beta) self.pool = Pool(processes=8, initializer=limit_cpu) return
def __init__( self, data, prior_eta=GammaPrior(2., 0.5), prior_mu=(0, 3.), prior_Sigma=(10, 0.5), p=10, max_clust_count=300, ntemps=3, stepping=1.05, ): assert type(data) is MixedData self.data = data self.max_clust_count = max_clust_count self.p = p self.nCat = self.data.nCat self.nCol = self.data.nCol self.tCol = self.nCol + self.nCat self.nDat = self.data.nDat self.nCats = self.data.Cats.shape[0] # Setting Priors _prior_mu = NormalPrior( np.ones(self.tCol) * prior_mu[0], np.eye(self.tCol) * np.sqrt(prior_mu[1]), np.eye(self.tCol) / np.sqrt(prior_mu[1]), ) _prior_Sigma = InvWishartPrior( self.tCol + prior_Sigma[0], np.eye(self.tCol) * prior_Sigma[1], ) self.priors = Prior(prior_eta, _prior_mu, _prior_Sigma) self.set_projection() self.categorical_considerations() # Parallel Tempering self.nTemp = ntemps self.itl = 1 / stepping**np.arange(ntemps) self.temp_unravel = np.repeat(np.arange(self.nTemp), self.nDat) self.nSwap_per = self.nTemp // 2 self.swap_start = 100 # Adaptive Metropolis self.am_Sigma = PerObsTemperedOnlineCovariance(self.nTemp, self.nDat, self.tCol, self.max_clust_count) self.am_scale = 2.38**2 / self.tCol return
'in_outcome_path': './ad/mammography/outcome.csv', 'out_path': './ad/mammography/results_mdppprgln_test.pkl', 'cat_vars': '[5,6,7,8]', 'decluster': 'False', 'quantile': 0.95, 'nSamp': 5000, 'nKeep': 2000, 'nThin': 3, 'eta_alpha': 2., 'eta_beta': 1., } p = Heap(**d) raw = read_csv(p.in_data_path).values out = read_csv(p.in_outcome_path).values data = MixedData( raw, cat_vars=np.array(eval(p.cat_vars), dtype=int), decluster=eval(p.decluster), quantile=float(p.quantile), outcome=out, ) data.fill_outcome(out) model = Chain(data, prior_eta=GammaPrior(2, 1), p=10, ntemps=3) model.sample(p.nSamp) model.write_to_disk(p.out_path, p.nKeep, p.nThin) res = Result(p.out_path) # res.write_posterior_predictive('./test/postpred.csv') # EOF
pt.MPI_MESSAGE_SIZE = 2**20 if rank > 0: chain = pt.PTSlave(comm=comm, statmodel=DPMPG_Chain) chain.watch() if rank == 0: raw = read_csv('./datasets/ivt_nov_mar.csv') raw2 = np.hstack((raw.T[-2:].T, raw.T[:-2].T)) data = Data_From_Raw(raw2, True) data.write_empirical('./output/dppgln2/empirical.csv') model = pt.PTMaster(comm, temperature_ladder=1.05**np.array(range(size - 1)), data=data, prior_eta=GammaPrior(2., .1)) model.sample(10000) model.write_to_disk('./output/dppgln2/results_2_1e-1.db', 5000, 1) model.complete() # res = DPMPG_Result('./output/dppgln/results_2_1e-1.db') # not working yet. # res.write_posterior_predictive('./output/dppgln/postpred_2_1e-1.csv') ## relevant to non-MPI version # if __name__ == '__main__': # raw = read_csv('./datasets/ivt_nov_mar.csv') # data = Data_From_Raw(raw, True) # data.write_empirical('./output/dppgln2/empirical.csv') # # model = pt.PTMaster( # statmodel = DPMPG_Chain,
from dp_projgamma import DPMPG, ResultDPMPG from projgamma import GammaPrior from data import Data_From_Raw from pandas import read_csv from random import shuffle path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) # cols = raw.columns.values.tolist() # shuffle(cols) # raw = raw.reindex(columns = cols) data = Data_From_Raw(raw, True) data.write_empirical('./output/dpmpg/empirical.csv') dpmpg = DPMPG( data, prior_eta = GammaPrior(2.,10.) ) dpmpg.initialize_sampler(50000) dpmpg.sample(50000) dpmpg.write_to_disk('./output/dpmpg/results_2_1e1.db', 25000,5) res = ResultDPMPG('./output/dpmpg/results_2_1e1.db') res.write_posterior_predictive('./output/dpmpg/postpred_2_1e1.csv') # EOF
p.out_folder, p.model, 'empirical.csv', ) out_path = os.path.join( p.out_folder, p.model, 'results_{}_{}.db'.format(p.eta_shape, p.eta_rate), ) pp_path = os.path.join( p.out_folder, p.model, 'postpred_{}_{}.csv'.format(p.eta_shape, p.eta_rate), ) model = Chain(data, prior_eta=GammaPrior(float(p.eta_shape), float(p.eta_rate))) elif p.model.startswith('m'): emp_path = os.path.join( p.out_folder, p.model, 'empirical.csv', ) out_path = os.path.join( p.out_folder, p.model, 'results_{}.db'.format(p.nMix), ) pp_path = os.path.join( p.out_folder, p.model,
'in_outcome_path': './ad/cover/outcome.csv', 'out_path': './ad/cover/results_mdppprg.pkl', 'cat_vars': '[9,10,11,12]', 'decluster': 'False', 'quantile': 0.998, 'nSamp': 50000, 'nKeep': 20000, 'nThin': 30, 'eta_alpha': 2., 'eta_beta': 1., } p = Heap(**d) raw = read_csv(p.in_data_path).values out = read_csv(p.in_outcome_path).values data = MixedData( raw, cat_vars=np.array(eval(p.cat_vars), dtype=int), decluster=eval(p.decluster), quantile=float(p.quantile), outcome=out, ) data.fill_outcome(out) model = Chain(data, prior_eta=GammaPrior(2, 1), p=10) model.sample(p.nSamp) model.write_to_disk(p.out_path, p.nKeep, p.nThin) res = Result(p.out_path) # res.write_posterior_predictive('./test/postpred.csv') # EOF
from dp_rprojgamma import DPMPG, ResultDPMPG from projgamma import GammaPrior from data import Data_From_Raw from pandas import read_csv from random import shuffle path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) # cols = raw.columns.values.tolist() # shuffle(cols) # raw = raw.reindex(columns = cols) data = Data_From_Raw(raw, True) data.write_empirical('./output/dpmrpg/empirical.csv') dpmpg = DPMPG( data, prior_eta = GammaPrior(2.,1e0) ) dpmpg.initialize_sampler(20000) dpmpg.sample(20000) dpmpg.write_to_disk('./output/dpmrpg/results_2_1e0.db', 10000,2) res = ResultDPMPG('./output/dpmrpg/results_2_1e0.db') res.write_posterior_predictive('./output/dpmrpg/postpred_2_1e0.csv') # EOF
Chain = models.Chains[p.model] Result = models.Results[p.model] data = Data(os.path.join(p.in_path, 'data.db')) if p.model.startswith('dp'): emp_path = os.path.join(p.in_path, p.model, 'empirical.csv') out_path = os.path.join( p.in_path, p.model, 'results_{}.db'.format(p.p), ) pp_path = os.path.join( p.in_path, p.model, 'postpred_{}.csv'.format(p.p), ) model = Chain( data, p = float(p.p), prior_eta = GammaPrior(float(p.eta_shape), float(p.eta_rate)), ) else: raise ValueError data.write_empirical(emp_path) model.sample(int(p.nSamp)) model.write_to_disk(out_path, int(p.nKeep), int(p.nThin)) res = Result(out_path) res.write_posterior_predictive(pp_path) # EOF
from dp_projgamma import DPMPG, ResultDPMPG from projgamma import GammaPrior from data import Data_From_Raw from pandas import read_csv from random import shuffle path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) # cols = raw.columns.values.tolist() # shuffle(cols) # raw = raw.reindex(columns = cols) data = Data_From_Raw(raw, True) data.write_empirical('./output/dpmpg/empirical.csv') dpmpg = DPMPG( data, prior_eta = GammaPrior(2.,0.1) ) dpmpg.initialize_sampler(50000) dpmpg.sample(50000) dpmpg.write_to_disk('./output/dpmpg/results_2_1e-1.db', 25000,5) res = ResultDPMPG('./output/dpmpg/results_2_1e-1.db') res.write_posterior_predictive('./output/dpmpg/postpred_2_1e-1.csv') # EOF
if rank > 0: chain = pt.PTSlave(comm=comm, statmodel=DPMPG_Chain) chain.watch() if rank == 0: raw = read_csv('./datasets/ivt_nov_mar.csv') data = Data_From_Raw(raw, True) data.write_empirical('./output/dppgln/empirical.csv') model = pt.PTMaster( comm, # statmodel = DPMPG_Chain, temperature_ladder=1.05**np.array(range(size - 1)), data=data, prior_eta=GammaPrior(2., 1.)) model.sample(50000) model.write_to_disk('./output/dppgln/results_2_1e0.db', 25000, 5) model.complete() res = DPMPG_Result('./output/dppgln/results_2_1e0.db') res.write_posterior_predictive('./output/dppgln/postpred_2_1e0.csv') # # if __name__ == '__main__': # raw = read_csv('./datasets/ivt_nov_mar.csv') # data = Data_From_Raw(raw, True) # data.write_empirical('./output/dpmpg_empirical_decluster.csv') # # model = pt.PTMaster( # statmodel = DPMPG_Chain,
from dp_projgamma import DPMPG, ResultDPMPG from projgamma import GammaPrior from data import Data_From_Raw from pandas import read_csv from random import shuffle path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) # cols = raw.columns.values.tolist() # shuffle(cols) # raw = raw.reindex(columns = cols) data = Data_From_Raw(raw, True) data.write_empirical('./output/dpmpg/empirical.csv') dpmpg = DPMPG(data, prior_eta=GammaPrior(2., 1.)) dpmpg.initialize_sampler(50000) dpmpg.sample(50000) dpmpg.write_to_disk('./output/dpmpg/results_2_1e0.db', 25000, 5) res = ResultDPMPG('./output/dpmpg/results_2_1e0.db') res.write_posterior_predictive('./output/dpmpg/postpred_2_1e0.csv') # EOF