def test_sample(self): N = 10 T = 2 postfn = lambda theta: None dist = 1.0 distfn = lambda X, Y: dist prior = abcpmc.TophatPrior([0], [100]) sampler = abcpmc.Sampler(N, 0, postfn, distfn) eps = 10 eps_proposal = abcpmc.ConstEps(T, eps) for i, pool in enumerate(sampler.sample(prior, eps_proposal)): assert pool is not None assert pool.t == i assert pool.ratio == 1.0 assert pool.eps == eps assert len(pool.thetas) == N assert np.all(pool.thetas != 0.0) assert len(pool.dists) == N assert np.all(pool.dists == dist) assert len(pool.ws) == N assert np.allclose(np.sum(pool.ws), 1.0) assert i + 1 == T
def sample(T, eps_val, eps_min): abcpmc_sampler = abcpmc.Sampler(N=1000, Y=data, postfn=simz, dist=distance, pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal eps = abcpmc.ConstEps(T, eps_val) pools = [] for pool in abcpmc_sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format( pool.t, eps(pool.t), pool.ratio)) plot_thetas(pool.thetas, pool.ws, pool.t) if (pool.t < 5): eps.eps = np.percentile(pool.dists, 50) else: eps.eps = np.percentile(pool.dists, 75) if eps.eps < eps_min: eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
def test_ConstEps(): eps_val = 0.5 T = 5 eps = abcpmc.ConstEps(T, eps_val) for i, e in enumerate(eps): assert e == eps_val assert i + 1 == T
def abc(pewl, name=None, niter=None, npart=None, restart=None): if restart is not None: # read pool theta_init = np.loadtxt( os.path.join(abc_dir, 'theta.t%i.dat' % restart)) rho_init = np.loadtxt( os.path.join(abc_dir, 'rho.t%i.dat' % restart)) w_init = np.loadtxt( os.path.join(abc_dir, 'w.t%i.dat' % restart)) init_pool = abcpmc.PoolSpec(restart, None, None, theta_init, rho_init, w_init) npart = len(theta_init) print('%i particles' % npart) else: init_pool = None #--- inference with ABC-PMC below --- # prior prior = abcpmc.TophatPrior(prior_min, prior_max) # sampler abcpmc_sampler = abcpmc.Sampler( N=npart, # N_particles Y=x_obs, # data postfn=_sumstat_model_wrap, # simulator dist=_distance_metric_wrap, # distance metric pool=pewl, postfn_kwargs={'dem': dem}#, dist_kwargs={'method': 'L2', 'phi_err': phi_err} ) # threshold eps = abcpmc.ConstEps(niter, eps0) print('eps0', eps.eps) for pool in abcpmc_sampler.sample(prior, eps, pool=init_pool): eps_str = ", ".join(["{0:>.4f}".format(e) for e in pool.eps]) print("T: {0}, eps: [{1}], ratio: {2:>.4f}".format(pool.t, eps_str, pool.ratio)) for i, (mean, std) in enumerate(zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))): print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std)) print('dist', pool.dists) # write out theta, weights, and distances to file dustInfer.writeABC('eps', pool, abc_dir=abc_dir) dustInfer.writeABC('theta', pool, abc_dir=abc_dir) dustInfer.writeABC('w', pool, abc_dir=abc_dir) dustInfer.writeABC('rho', pool, abc_dir=abc_dir) # update epsilon based on median thresholding eps.eps = np.median(pool.dists, axis=0) print('eps%i' % pool.t, eps.eps) print('----------------------------------------') #if pool.ratio <0.2: break abcpmc_sampler.close() return None
def test_sample(self): N = 10 T = 2 postfn = lambda theta: None dist = lambda X, Y: 0 prior = abcpmc.TophatPrior([0], [100]) sampler = abcpmc.Sampler(N, 0, postfn, dist) eps_proposal = abcpmc.ConstEps(T, 10) for i, pool in enumerate(sampler.sample(prior, eps_proposal)): assert pool is not None assert len(pool.thetas) == N assert i + 1 == T
def launch(threads): eps = abcpmc.ConstEps(T, eps_start) pools = [] # pool is a namedtuple representing the values of one iteration for pool in sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format( pool.t, eps(pool.eps), pool.ratio)) for i, (mean, std) in enumerate( zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))): print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format( i, mean, std)) # reduce the eps value to the alpha-th percentile of the sorted distances eps.eps = np.percentile(pool.dists, alpha) pools.append(pool) sampler.close() return pools
def sample(T, eps_val, eps_min): prior = abcpmc.TophatPrior([10., np.log(.1), 11.02, .8, 13.], [13., np.log(.7), 13.02, 1.3, 14.]) abcpmc_sampler = abcpmc.Sampler(N=1000, Y=data, postfn=simz, dist=distance, pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #abcpmc.Sampler.particle_proposal_kwargs = {'k': 50} #abcpmc_sampler.particle_proposal_cls = abcpmc.KNNParticleProposal eps = abcpmc.ConstEps(T, [1.e13, 1.e13]) pools = [] for pool in abcpmc_sampler.sample(prior, eps): print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) plot_thetas(pool.thetas, pool.ws, pool.t) np.savetxt( "/home/mj/public_html/nbar_gmf5_Mr20_theta_t" + str(t) + ".dat", theta) np.savetxt("/home/mj/public_html/nbar_gmf5_Mr20_w_t" + str(t) + ".dat", w) if pool.t < 3: eps.eps = np.percentile(np.atleast_2d(pool.dists), 50, axis=0) elif (pool.t > 2) and (pool.t < 20): eps.eps = np.percentile(np.atleast_2d(pool.dists), 75, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal else: eps.eps = np.percentile(np.atleast_2d(pool.dists), 90, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #if eps.eps < eps_min: # eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
def config(self): try: import abcpmc except ImportError: raise ValueError("To use ABC PMC you need to install it with pip install abcpmc") global abc_pipeline abc_pipeline = self.pipeline self.threshold = self.read_ini("threshold",str, 'LinearEps') self.metric_kw = self.read_ini("metric",str, 'chi2') #mean, chi2 or other if self.metric_kw =='other': self.distance_func = self.read_ini("distance_func",str, None) #only for other metric, self.metric = self.distance_func[1:-1] self.epimax = self.read_ini('epimax', float,5.0) self.epimin = self.read_ini('epimin',float, 1.0) self.part_prop = self.read_ini("particle_prop",str,'weighted_cov') self.set_prior = self.read_ini("set_prior",str,'uniform') self.param_cov = self.read_ini("param_cov_file",str,'None') self.knn = self.read_ini("num_nn",int, 10) self.npart = self.read_ini("npart",int,100) self.niter = self.read_ini("niter",int,2) self.ngauss = self.read_ini("ngauss",int,4) self.run_multigauss = self.read_ini("run_multigauss",bool,False) self.diag_cov = self.read_ini("diag_cov",bool,False) self.ndim = len(self.pipeline.varied_params) #options for decreasing threshold if self.threshold == 'ConstEps': self.eps = abcpmc.ConstEps(self.niter, self.epimax) elif self.threshold == 'ExpEps': self.eps = abcpmc.ExponentialEps(self.niter, self.epimax,self.epimin) else: self.eps = abcpmc.LinearEps(self.niter, self.epimax, self.epimin) print("\nRunning ABC PMC") print("with %d particles, %s prior, %s threshold, %d iterations over (%f,%f), %s kernal \n" % (self.npart,self.set_prior,self.threshold,self.niter,self.epimax,self.epimin,self.part_prop)) #Initial positions for all of the parameters self.p0 = np.array([param.start for param in self.pipeline.varied_params]) #Data file is read for use in dist() for each step #parameter covariance used in the prior self.data, self.cov, self.invcov = self.load_data() #At the moment the same prior (with variable hyperparameters) is # used for all parameters - would be nice to change this to be more flexible self.pmin = np.zeros(self.ndim) self.pmax = np.zeros(self.ndim) for i,pi in enumerate(self.pipeline.varied_params): self.pmin[i] = pi.limits[0] self.pmax[i] = pi.limits[1] if self.set_prior.lower() == 'uniform': self.prior = abcpmc.TophatPrior(self.pmin,self.pmax) elif self.set_prior.lower() == 'gaussian': sigma2 = np.loadtxt(self.param_cov) if len(np.atleast_2d(sigma2)[0][:]) != self.ndim: raise ValueError("Cov matrix for Gaussian prior has %d columns for %d params" % len(np.atleast_2d(sigma2)[0][:]), self.ndim) else: self.prior = abcpmc.GaussianPrior(self.p0, np.atleast_2d(sigma2)) else: raise ValueError("Please set the ABC option 'set_prior' to either 'uniform' or 'gaussian'. At the moment only 'uniform' works in the general case.") #create sampler self.sampler = abcpmc.Sampler(N=self.npart, Y=self.data, postfn=abc_model, dist=self.dist) #set particle proposal kernal abcpmc.Sampler.particle_proposal_kwargs = {} if self.part_prop == 'KNN': abcpmc.Sampler.particle_proposal_kwargs = {'k':self.knn} self.sampler.particle_proposal_cls = abcpmc.KNNParticleProposal elif self.part_prop == 'OLCM': self.sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal self.converged = False
def std(x, y): return abs(np.std(x) - np.std(y)) ''' Setup ''' # 'Best' guess about the distribution, uniform distribution prior = abcpmc.TophatPrior([0.0, 1.0], [2.0, 3.0]) # As threshold for accepting draws from the prior we use the alpha-th percentile # of the sorted distances of the particles of the current iteration alpha = 75 T = 2 # sample for T iterations eps_start = 20.0 # sufficiently high starting threshold (like 5x the variability or more) eps = abcpmc.ConstEps(T, eps_start) ''' Sampling function ''' def launch(threads): eps = abcpmc.ConstEps(T, eps_start) pools = [] # pool is a namedtuple representing the values of one iteration for pool in sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format( pool.t, eps(pool.eps), pool.ratio)) for i, (mean, std) in enumerate( zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))):
def ABC(T, eps_input, Npart=1000, cen_tf=None, cen_prior_name=None, cen_abcrun=None): ''' ABC-PMC implementation. Parameters ---------- T : (int) Number of iterations eps_input : (float) Starting epsilon threshold value N_part : (int) Number of particles prior_name : (string) String that specifies what prior to use. abcrun : (string) String that specifies abc run information ''' abcinh = ABCInherit(cen_tf, abcrun=cen_abcrun, prior_name=cen_prior_name) # Data (Group Catalog Satellite fQ) grpcat = GroupCat(Mrcut=18, position='satellite') grpcat.Read() qfrac = Fq() m_bin = np.array([9.7, 10.1, 10.5, 10.9, 11.3]) M_mid = 0.5 * (m_bin[:-1] + m_bin[1:]) sfq = qfrac.Classify(grpcat.mass, grpcat.sfr, np.median(grpcat.z), sfms_prop=abcinh.sim_kwargs['sfr_prop']['sfms']) ngal, dum = np.histogram(grpcat.mass, bins=m_bin) ngal_q, dum = np.histogram(grpcat.mass[sfq == 'quiescent'], bins=m_bin) data_sum = [M_mid, ngal_q.astype('float') / ngal.astype('float')] # Simulator cen_assigned_sat_file = ''.join([ '/data1/hahn/pmc_abc/pickle/', 'satellite', '.cenassign', '.', cen_abcrun, '_ABC', '.', cen_prior_name, '_prior', '.p' ]) if not os.path.isfile(cen_assigned_sat_file): sat_cen = AssignCenSFR(cen_tf, abcrun=cen_abcrun, prior_name=cen_prior_name) pickle.dump(sat_cen, open(cen_assigned_sat_file, 'wb')) else: sat_cen = pickle.load(open(cen_assigned_sat_file, 'rb')) def Simz(tt): # Simulator (forward model) tqdel_dict = {'name': 'explin', 'm': tt[0], 'b': tt[1]} sat_evol = EvolveSatSFR(sat_cen, tqdelay_dict=tqdel_dict) sfq_sim = qfrac.Classify(sat_evol.mass, sat_evol.sfr, sat_evol.zsnap, sfms_prop=sat_evol.sfms_prop) ngal_sim, dum = np.histogram(sat_evol.mass, bins=m_bin) ngal_q_sim, dum = np.histogram(sat_evol.mass[sfq_sim == 'quiescent'], bins=m_bin) sim_sum = [ M_mid, ngal_q_sim.astype('float') / ngal_sim.astype('float') ] return sim_sum # Priors prior_min = [-11.75, 2.] prior_max = [-10.25, 4.] prior = abcpmc.TophatPrior(prior_min, prior_max) # ABCPMC prior object def rho(simum, datum): datum_dist = datum[1] simum_dist = simum[1] drho = np.sum((datum_dist - simum_dist)**2) return drho abcrun_flag = cen_abcrun + '_central' theta_file = lambda pewl: ''.join([ code_dir(), 'dat/pmc_abc/', 'Satellite.tQdelay.theta_t', str(pewl), '_', abcrun_flag, '.dat' ]) w_file = lambda pewl: ''.join([ code_dir(), 'dat/pmc_abc/', 'Satellite.tQdelay.w_t', str(pewl), '_', abcrun_flag, '.dat' ]) dist_file = lambda pewl: ''.join([ code_dir(), 'dat/pmc_abc/', 'Satellite.tQdelay.dist_t', str(pewl), '_', abcrun_flag, '.dat' ]) eps_file = ''.join([ code_dir(), 'dat/pmc_abc/Satellite.tQdelay.epsilon_', abcrun_flag, '.dat' ]) eps = abcpmc.ConstEps(T, eps_input) try: mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=rho, # distance function pool=mpi_pool) except AttributeError: abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=rho) # distance function abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal pools = [] f = open(eps_file, "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps, pool=None): print '----------------------------------------' print 'eps ', pool.eps new_eps_str = '\t' + str(pool.eps) + '\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open(eps_file, "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # write theta, weights, and distances to file np.savetxt(theta_file(pool.t), pool.thetas, header='tQdelay_slope, tQdelay_offset') np.savetxt(w_file(pool.t), pool.ws) np.savetxt(dist_file(pool.t), pool.dists) # update epsilon based on median thresholding eps.eps = np.median(pool.dists) pools.append(pool) return pools
def main_abcpmc_MUSIC2(conf, test=False): """ config should contain [][]: a list etc. eps_start is actually important as the next iteration will only start if the number of computed trials within these boundaries will be Nw. So in one case I had to draw and compute twice as many particles than Nw. About the treads: 14-16 treads are fine, as more treats wont be fully used and just sit in the taskqueue """ # Loads the real data to compare with (and if neccessary also test data) data = iom.unpickleObject(conf['paths']['surveyreal']) if test: dataMUSIC2 = iom.unpickleObject(conf['paths']['surveysim']) print(type(dataMUSIC2.Rmodel), conf['paths']['surveysim']) surmet.abcpmc_dist_severalMetrices(dataMUSIC2, data, metrics=json.loads(conf['metrics']['used']), delal=False, stochdrop=conf['flavor']['stochdrop'], phoenixdrop = conf['flavor']['phoenixdrop'], outpath='/data/') return 0 """ The abcpmc part starts: Define thetas i.e. parameter values to be inferred and priors""" if conf['prior']['type'] == 'tophat': bounds = json.loads(conf['prior']['bounds']) prior = abcpmc.TophatPrior(bounds[0], bounds[1]) elif conf['prior']['type'] == 'gaussian': means = json.loads(conf['prior']['means']) COV = json.loads(conf['prior']['covariance']) prior = abcpmc.GaussianPrior(mu=means, sigma=COV) else: print('inference_abcpmc::main_abcpmc_MUSIC2: prior %s is unknown!' % (conf['prior']['type'])) return 0 eps = abcpmc.ConstEps(conf.getint('pmc', 'T'), json.loads(conf['metrics']['eps_startlimits'])) if test: sampler = abcpmc.Sampler(N=conf.getint('pmc', 'Nw'), Y=data, postfn=testrand, dist=testmetric, threads=conf.getint('mp', 'Nthreads'), maxtasksperchild=conf.getint('mp', 'maxtasksperchild')) else: sampler = abcpmc.Sampler(N=conf.getint('pmc', 'Nw'), Y=data, postfn=partial(music2run.main_ABC, parfile=conf['simulation']['parfile']), dist=partial(surmet.abcpmc_dist_severalMetrices, metrics=json.loads(conf['metrics']['used']), outpath=conf['paths']['abcpmc'], stochdrop=conf['flavor']['stochdrop'], phoenixdrop = conf['flavor']['phoenixdrop']), threads=conf.getint('mp', 'Nthreads'), maxtasksperchild=conf.getint('mp', 'maxtasksperchild')) # Prepares the file for counting with open(conf['paths']['abcpmc'] + 'count.txt', 'w+') as f: f.write('0') sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal """ compare with AstroABC sampler = astroabc.ABC_class(Ndim,walkers,data,tlevels,niter,priors,**prop) sampler.sample(music2run.main_astroABC) """ # startfrom=iom.unpickleObject('/data/ClusterBuster-Output/MUSIC_NVSS02_Test01/launch_pools') pool = None #startfrom[-1] launch(sampler, prior, conf.getfloat('pmc','alpha'), eps, surveypath=conf['paths']['abcpmc'], pool=pool)
def FixedTauABC(T, eps_input, fixtau='satellite', Npart=1000, prior_name='try0', observables=['fqz_multi'], abcrun=None, restart=False, t_restart=None, eps_restart=None, **sim_kwargs): ''' Run ABC-PMC analysis for central galaxy SFH model with *FIXED* quenching timescale Parameters ---------- T : (int) Number of iterations eps_input : (float) Starting epsilon threshold value N_part : (int) Number of particles prior_name : (string) String that specifies what prior to use. abcrun : (string) String that specifies abc run information ''' if isinstance(eps_input, list): if len(eps_input) != len(observables): raise ValueError if len(observables) > 1 and isinstance(eps_input, float): raise ValueError # output abc run details sfinherit_kwargs, abcrun_flag = MakeABCrun( abcrun=abcrun, Niter=T, Npart=Npart, prior_name=prior_name, eps_val=eps_input, restart=restart, **sim_kwargs) # Data data_sum = DataSummary(observables=observables) # Priors prior_min, prior_max = PriorRange(prior_name) prior = abcpmc.TophatPrior(prior_min, prior_max) # ABCPMC prior object def Simz(tt): # Simulator (forward model) gv_slope = tt[0] gv_offset = tt[1] fudge_slope = tt[2] fudge_offset = tt[3] sim_kwargs = sfinherit_kwargs.copy() sim_kwargs['sfr_prop']['gv'] = {'slope': gv_slope, 'fidmass': 10.5, 'offset': gv_offset} sim_kwargs['evol_prop']['fudge'] = {'slope': fudge_slope, 'fidmass': 10.5, 'offset': fudge_offset} sim_kwargs['evol_prop']['tau'] = {'name': fixtau} sim_output = SimSummary(observables=observables, **sim_kwargs) return sim_output theta_file = lambda pewl: ''.join([code_dir(), 'dat/pmc_abc/', 'CenQue_theta_t', str(pewl), '_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) w_file = lambda pewl: ''.join([code_dir(), 'dat/pmc_abc/', 'CenQue_w_t', str(pewl), '_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) dist_file = lambda pewl: ''.join([code_dir(), 'dat/pmc_abc/', 'CenQue_dist_t', str(pewl), '_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) eps_file = ''.join([code_dir(), 'dat/pmc_abc/epsilon_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) distfn = RhoFq if restart: if t_restart is None: raise ValueError last_thetas = np.loadtxt(theta_file(t_restart)) last_ws = np.loadtxt(w_file(t_restart)) last_dist = np.loadtxt(dist_file(t_restart)) init_pool = abcpmc.PoolSpec(t_restart, None, None, last_thetas, last_dist, last_ws) else: init_pool = None eps = abcpmc.ConstEps(T, eps_input) try: mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=distfn, # distance function pool=mpi_pool) except AttributeError: abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=distfn) # distance function abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal pools = [] if init_pool is None: f = open(eps_file, "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps, pool=init_pool): print '----------------------------------------' print 'eps ', pool.eps new_eps_str = str(pool.eps)+'\t'+str(pool.ratio)+'\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open(eps_file, "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # write theta, weights, and distances to file np.savetxt(theta_file(pool.t), pool.thetas, header='gv_slope, gv_offset, fudge_slope, fudge_offset') np.savetxt(w_file(pool.t), pool.ws) np.savetxt(dist_file(pool.t), pool.dists) # update epsilon based on median thresholding if len(observables) == 1: eps.eps = np.median(pool.dists) else: #print pool.dists print np.median(np.atleast_2d(pool.dists), axis = 0) eps.eps = np.median(np.atleast_2d(pool.dists), axis = 0) print '----------------------------------------' pools.append(pool) return pools