def test_normal(self): prior = abcpmc.GaussianPrior([0, 0], [[1, 0], [0, 1]]) rngs = np.array([prior() for _ in range(10000)]) assert len(rngs.shape) == 2 D, p = stats.kstest(rngs[:, 0], "norm") assert D < 0.015
def test_pdf(self): try: from scipy.stats import multivariate_normal except ImportError: pytest.skip("Scipy.stats.multivariate_normal is not available") prior = abcpmc.GaussianPrior([0, 0], [[1, 0], [0, 1]]) theta = prior([0, 0]) assert np.allclose(theta, 0.15915, 1e-4)
def config(self): try: import abcpmc except ImportError: raise ValueError("To use ABC PMC you need to install it with pip install abcpmc") global abc_pipeline abc_pipeline = self.pipeline self.threshold = self.read_ini("threshold",str, 'LinearEps') self.metric_kw = self.read_ini("metric",str, 'chi2') #mean, chi2 or other if self.metric_kw =='other': self.distance_func = self.read_ini("distance_func",str, None) #only for other metric, self.metric = self.distance_func[1:-1] self.epimax = self.read_ini('epimax', float,5.0) self.epimin = self.read_ini('epimin',float, 1.0) self.part_prop = self.read_ini("particle_prop",str,'weighted_cov') self.set_prior = self.read_ini("set_prior",str,'uniform') self.param_cov = self.read_ini("param_cov_file",str,'None') self.knn = self.read_ini("num_nn",int, 10) self.npart = self.read_ini("npart",int,100) self.niter = self.read_ini("niter",int,2) self.ngauss = self.read_ini("ngauss",int,4) self.run_multigauss = self.read_ini("run_multigauss",bool,False) self.diag_cov = self.read_ini("diag_cov",bool,False) self.ndim = len(self.pipeline.varied_params) #options for decreasing threshold if self.threshold == 'ConstEps': self.eps = abcpmc.ConstEps(self.niter, self.epimax) elif self.threshold == 'ExpEps': self.eps = abcpmc.ExponentialEps(self.niter, self.epimax,self.epimin) else: self.eps = abcpmc.LinearEps(self.niter, self.epimax, self.epimin) print("\nRunning ABC PMC") print("with %d particles, %s prior, %s threshold, %d iterations over (%f,%f), %s kernal \n" % (self.npart,self.set_prior,self.threshold,self.niter,self.epimax,self.epimin,self.part_prop)) #Initial positions for all of the parameters self.p0 = np.array([param.start for param in self.pipeline.varied_params]) #Data file is read for use in dist() for each step #parameter covariance used in the prior self.data, self.cov, self.invcov = self.load_data() #At the moment the same prior (with variable hyperparameters) is # used for all parameters - would be nice to change this to be more flexible self.pmin = np.zeros(self.ndim) self.pmax = np.zeros(self.ndim) for i,pi in enumerate(self.pipeline.varied_params): self.pmin[i] = pi.limits[0] self.pmax[i] = pi.limits[1] if self.set_prior.lower() == 'uniform': self.prior = abcpmc.TophatPrior(self.pmin,self.pmax) elif self.set_prior.lower() == 'gaussian': sigma2 = np.loadtxt(self.param_cov) if len(np.atleast_2d(sigma2)[0][:]) != self.ndim: raise ValueError("Cov matrix for Gaussian prior has %d columns for %d params" % len(np.atleast_2d(sigma2)[0][:]), self.ndim) else: self.prior = abcpmc.GaussianPrior(self.p0, np.atleast_2d(sigma2)) else: raise ValueError("Please set the ABC option 'set_prior' to either 'uniform' or 'gaussian'. At the moment only 'uniform' works in the general case.") #create sampler self.sampler = abcpmc.Sampler(N=self.npart, Y=self.data, postfn=abc_model, dist=self.dist) #set particle proposal kernal abcpmc.Sampler.particle_proposal_kwargs = {} if self.part_prop == 'KNN': abcpmc.Sampler.particle_proposal_kwargs = {'k':self.knn} self.sampler.particle_proposal_cls = abcpmc.KNNParticleProposal elif self.part_prop == 'OLCM': self.sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal self.converged = False
def MSE(x, y): """ Mean squared error distance measure""" return np.mean(np.power(x - y, 2)) def euclidian(x, y): return np.linalg.norm(x - y) def std(x, y): return abs(np.std(x) - np.std(y)) ''' Setup ''' # 'Best' guess about the distribution prior = abcpmc.GaussianPrior(mu=[1.0, 1.0], sigma=np.eye(2) * 0.5) # 'Best' guess about the distribution, uniform distribution prior = abcpmc.TophatPrior([0.0, 0.0], [5.0, 5.0]) # As threshold for accepting draws from the prior we use the alpha-th percentile # of the sorted distances of the particles of the current iteration alpha = 75 T = 10 # sample for T iterations eps_start = 1.0 # sufficiently high starting threshold eps = abcpmc.ConstEps(T, eps_start) ''' Sampling function ''' def launch(threads): eps = abcpmc.ConstEps(T, eps_start)
def main_abcpmc_MUSIC2(conf, test=False): """ config should contain [][]: a list etc. eps_start is actually important as the next iteration will only start if the number of computed trials within these boundaries will be Nw. So in one case I had to draw and compute twice as many particles than Nw. About the treads: 14-16 treads are fine, as more treats wont be fully used and just sit in the taskqueue """ # Loads the real data to compare with (and if neccessary also test data) data = iom.unpickleObject(conf['paths']['surveyreal']) if test: dataMUSIC2 = iom.unpickleObject(conf['paths']['surveysim']) print(type(dataMUSIC2.Rmodel), conf['paths']['surveysim']) surmet.abcpmc_dist_severalMetrices(dataMUSIC2, data, metrics=json.loads(conf['metrics']['used']), delal=False, stochdrop=conf['flavor']['stochdrop'], phoenixdrop = conf['flavor']['phoenixdrop'], outpath='/data/') return 0 """ The abcpmc part starts: Define thetas i.e. parameter values to be inferred and priors""" if conf['prior']['type'] == 'tophat': bounds = json.loads(conf['prior']['bounds']) prior = abcpmc.TophatPrior(bounds[0], bounds[1]) elif conf['prior']['type'] == 'gaussian': means = json.loads(conf['prior']['means']) COV = json.loads(conf['prior']['covariance']) prior = abcpmc.GaussianPrior(mu=means, sigma=COV) else: print('inference_abcpmc::main_abcpmc_MUSIC2: prior %s is unknown!' % (conf['prior']['type'])) return 0 eps = abcpmc.ConstEps(conf.getint('pmc', 'T'), json.loads(conf['metrics']['eps_startlimits'])) if test: sampler = abcpmc.Sampler(N=conf.getint('pmc', 'Nw'), Y=data, postfn=testrand, dist=testmetric, threads=conf.getint('mp', 'Nthreads'), maxtasksperchild=conf.getint('mp', 'maxtasksperchild')) else: sampler = abcpmc.Sampler(N=conf.getint('pmc', 'Nw'), Y=data, postfn=partial(music2run.main_ABC, parfile=conf['simulation']['parfile']), dist=partial(surmet.abcpmc_dist_severalMetrices, metrics=json.loads(conf['metrics']['used']), outpath=conf['paths']['abcpmc'], stochdrop=conf['flavor']['stochdrop'], phoenixdrop = conf['flavor']['phoenixdrop']), threads=conf.getint('mp', 'Nthreads'), maxtasksperchild=conf.getint('mp', 'maxtasksperchild')) # Prepares the file for counting with open(conf['paths']['abcpmc'] + 'count.txt', 'w+') as f: f.write('0') sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal """ compare with AstroABC sampler = astroabc.ABC_class(Ndim,walkers,data,tlevels,niter,priors,**prop) sampler.sample(music2run.main_astroABC) """ # startfrom=iom.unpickleObject('/data/ClusterBuster-Output/MUSIC_NVSS02_Test01/launch_pools') pool = None #startfrom[-1] launch(sampler, prior, conf.getfloat('pmc','alpha'), eps, surveypath=conf['paths']['abcpmc'], pool=pool)
size = 5000 sigma = np.eye(4) * 0.25 means = np.array([1.1, 1.5, 1.1, 1.5]) data = np.random.multivariate_normal(means, sigma, size) print(data) #------- #distance function: sum of abs mean differences def dist(x, y): return np.sum(np.abs(np.mean(x, axis=0) - np.mean(y, axis=0))) #our "model", a gaussian with varying means def postfn(theta): return np.random.multivariate_normal(theta, sigma, size) eps = abcpmc.LinearEps(10, 5, 0.75) prior = abcpmc.GaussianPrior(means * 1.1, sigma * 2) #our best guess sampler = abcpmc.Sampler(N=10, Y=data, postfn=postfn, dist=dist) for pool in sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format( pool.t, pool.eps, pool.ratio)) for i, (mean, std) in enumerate( zip(np.mean(pool.thetas, axis=0), np.std(pool.thetas, axis=0))): print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean, std))