def run_chi_mcmc_redux(obs, initial, group_class, mcmc_args): # This is just the version of the mcmc without the C2 fitting. ### Group Bounds are still important print("... loading arch libs") arch_path = "/Users/MasterD/Google Drive/CCSLab/dev/arch_libs/interp/arch_spec_interp.pkl" arch_spec_int = pkl.load(open(arch_path, 'rb')) group_dict = { "GI": { 'FEH': -2.5, "CARBON": 7.9 }, "GII": { 'FEH': -3.5, "CARBON": 5.9 }, 'GIII': { 'FEH': -4.3, "CARBON": 7.0 } } group_bounds = { 'GI': { "T": [4000, 5000], "FEH": [-3.5, -1.0], 'CARBON': [7.0, 9.0] }, 'GII': { "T": [4000, 5000], "FEH": [-4.5, -2.0], 'CARBON': [-1.0, 1.5] }, 'GIII': { "T": [4000, 5000], "FEH": [-4.5, -3.0], 'CARBON': [6.0, 7.5] } } print("MCMC params") mcmc_args['bounds'] = group_bounds[group_class.split("_")[0]] print("Computing beta params") CAII_BETA = get_beta_params(obs, [3884, 3923]) CH_BETA = get_beta_params(obs, [4222, 4322]) mcmc_args['CAII_alpha'] = CAII_BETA['alpha'] mcmc_args['CAII_beta'] = CAII_BETA['beta'] mcmc_args['CH_alpha'] = CH_BETA['alpha'] mcmc_args['CH_beta'] = CH_BETA['beta'] pos = initial + 1e-2 * np.random.randn(25, len(initial)) nwalkers, ndim = pos.shape sampler = emcee.EnsembleSampler(nwalkers, ndim, chi_likelihood_redux, args=(obs, arch_spec_int[group_class], mcmc_args)) _ = sampler.run_mcmc(pos, 1500) return sampler, ndim, mcmc_args
def preform_emcee(time, flux, sigma_sq, ROW): diff_time = [x - time[i - 1] for i, x in enumerate(time)][1:] print(min(diff_time)) plt.figure() plt.errorbar(time, flux, err) plt.savefig('figure/' + str(ROW) + 'LC' + '.pdf') #plt.show() X = np.arange(-1, 5, .1) #tau Y = np.arange(-2.5, 1.5, .1) #variance X, Y = np.meshgrid(X, Y) lprob_dens = lnprob_dens((Y, X), time, flux, err) fig = plt.figure() lprob_dens = np.array(lprob_dens) plt.pcolormesh(X, Y, lprob_dens.reshape(X.shape), shading='gouraud', cmap=cm.rainbow) cbar = plt.colorbar() cbar.set_label('log(probability)') plt.xlabel("Tau") plt.ylabel("Variance") plt.savefig('figure/' + str(ROW) + 'logprob_density_norm' + '.pdf') #plt.show() nll = lambda *args: -lnlike(*args) ndim, nwalkers = 2, 100 if sys.argv[5].lower() == 'normal': result = [np.log10(V), np.log10(Tau)] pos = [ result + (-0.5 + np.random.randn(ndim)) for i in range(nwalkers) ] elif sys.argv[5].lower() == 'grid': v_grid = np.arange(-1, 0, 0.1) t_grid = np.arange(1, 2, 0.1) VG, TG = np.meshgrid(v_grid, t_grid) result = [ np.array(thing) for thing in zip(VG.flatten(), TG.flatten()) ] # for python 2.7 pos = [ result[i] + 1e-7 * np.random.randn(ndim) for i in range(nwalkers) ] elif sys.argv[5].lower() == 'optimal': result = op.minimize(nll, [np.log10(V), np.log10(Tau)], args=(time, flux, err**2)) pos = [ result['x'] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers) ] else: print("What the hell do you want to do?") print("'grid', 'optimal', or 'normal' search through MCMC?") exit() sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(time, flux, err**2)) print(np.array(pos).shape) sampler.run_mcmc(pos, 100) samples = sampler.chain[:, 20:, :].reshape((-1, ndim)) plt.figure() plt.plot(logprobs) plt.savefig('figure/' + str(ROW) + sys.argv[5] + 'logprob' + '.pdf') #plt.show() max_theta = logvals[logprobs.index(max(logprobs))] fig = corner.corner(samples, labels=[r"log$_{10}V$", r"log$_{10}\tau$"], truths=[max_theta[0], max_theta[1]]) fig.savefig("figure/" + str(ROW) + sys.argv[5] + "triangle_np.pdf") V_mcmc, Tau_mcmc = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) print('V_mcmc:', V_mcmc, 'Tau_mcmc:', Tau_mcmc, max_theta[0], max_theta[1]) print('ROW:', ROW, 'Tau:', str(max_theta[1]), 'V:', str(max_theta[0])) filename = 'scratch_new/' + str(ROW) + sys.argv[5] + 'object' + '.txt' with open(filename, 'w+') as fout: fout.write('Object: ' + str(ROW) + ' ' + 'Tau: ' + str(max_theta[1]) + ' ' + 'V: ' + str(max_theta[0]) + '\n') sausageplot(max_theta[0], time, flux, max_theta[1], 5, err**2, ROW)
if not np.isfinite(log_pr): return -np.inf return log_pr - log_likelihood(parameter, x, y, sigma_y) # minimizing the -ln L i.e maximizing the L ,which is objective fun. here from scipy.optimize import minimize guess = (1. , 1. ,1.) soln = minimize(log_likelihood , guess , args=(x, y, sigma_y)) # initializing the Markov Chains of parameters nwalkers, ndim = 50, 3 pos = soln.x + 1e-5 * np.random.randn(nwalkers, ndim) # MCMC through emcee lib. import emcee sampling_tool = emcee.EnsembleSampler(nwalkers , ndim , log_posterior , args=(x, y, sigma_y)) sampling_tool.run_mcmc(pos, 4000) samples = sampling_tool.get_chain() # Calculating the best fitted value i.e. mean of posterior PDF a_best=np.median(samples[:,:,0]) b_best=np.median(samples[:,:,1]) c_best=np.median(samples[:,:,2]) # Calculating the one-sigma uncertainties i.e. Standard Deviation # of posterior PDF one_sigma_a=np.std(samples[:,:,0]) one_sigma_b=np.std(samples[:,:,1]) one_sigma_c=np.std(samples[:,:,2])
def lnprob(params): gp.set_parameter_vector(params) lp = lnprior(params) # lp = gp.log_prior() if not np.isfinite(lp): return -np.inf return gp.log_likelihood(y) + lp import emcee initial = gp.get_parameter_vector() # initial = np.array(initial_params) # initial = np.array(soln.x) ndim, nwalkers = len(initial), 32 sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=14) import time time_start = time.time() print("Running first burn-in...") pos = initial + 1e-4 * np.random.randn(nwalkers, ndim) pos, prob, _ = sampler.run_mcmc(pos, 3000) print("Running second burn-in...") pos = pos[np.argmax(prob)] + 1e-4 * np.random.randn(nwalkers, ndim) pos, prob, _ = sampler.run_mcmc(pos, 2000) # print("Running third burn-in...") # pos = pos[np.argmax(prob)] + 1e-8 * np.random.randn(nwalkers, ndim) # pos, prob, _ = sampler.run_mcmc(pos, 2000)
return -0.5 * chisq_sn_cmb([omgM, h, gamma0, gamma1, sigma8]) def lnprob_sn_bao(pars): lp = lnprior(pars) if not np.isfinite(lp): return -np.inf return lp + lnlike_sn_bao(pars) ndim, nwalkers, nsteps = 5, 50, 1000 pos = [[omgM_sn_cmb, h_sn_cmb, gamma0_sn_cmb, gamma1_sn_cmb, sigma8_sn_cmb] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)] # MCMC chain with 50 walkers and 1000 steps sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob_sn_bao, threads=4) sampler.run_mcmc(pos, nsteps) # Getting chains omgM_sn_cmb_chain = sampler.chain[:, :, 0] h_sn_cmb_chain = sampler.chain[:, :, 1] gamma0_sn_cmb_chain = sampler.chain[:, :, 2] gamma1_sn_cmb_chain = sampler.chain[:, :, 3] sigma8_sn_cmb_chain = sampler.chain[:, :, 4] # Average and standard deviation between chains h_sn_cmb_chain_mean = np.mean(h_sn_cmb_chain, axis=0) h_sn_cmb_chain_std = np.std(h_sn_cmb_chain, axis=0) / np.sqrt(nwalkers) # Reshaping omgM_sn_cmb_chain_flat = np.reshape(omgM_sn_cmb_chain, (nwalkers * nsteps, ))
def main(argv=None): parser = argparse.ArgumentParser( description= "PINT tool for MCMC optimization of timing models using event data.") parser.add_argument("eventfile", help="event file to use") parser.add_argument("parfile", help="par file to read model from") parser.add_argument("gaussianfile", help="gaussian file that defines template") parser.add_argument("--ft2", help="Path to FT2 file.", default=None) parser.add_argument( "--weightcol", help="name of weight column (or 'CALC' to have them computed", default=None, ) parser.add_argument("--nwalkers", help="Number of MCMC walkers (def 200)", type=int, default=200) parser.add_argument( "--burnin", help="Number of MCMC steps for burn in (def 100)", type=int, default=100, ) parser.add_argument( "--nsteps", help="Number of MCMC steps to compute (def 1000)", type=int, default=1000, ) parser.add_argument("--minMJD", help="Earliest MJD to use (def 54680)", type=float, default=54680.0) parser.add_argument("--maxMJD", help="Latest MJD to use (def 57250)", type=float, default=57250.0) parser.add_argument("--phs", help="Starting phase offset [0-1] (def is to measure)", type=float) parser.add_argument("--phserr", help="Error on starting phase", type=float, default=0.03) parser.add_argument( "--minWeight", help="Minimum weight to include (def 0.05)", type=float, default=0.05, ) parser.add_argument( "--wgtexp", help= "Raise computed weights to this power (or 0.0 to disable any rescaling of weights)", type=float, default=0.0, ) parser.add_argument( "--testWeights", help="Make plots to evalute weight cuts?", default=False, action="store_true", ) parser.add_argument( "--doOpt", help="Run initial scipy opt before MCMC?", default=False, action="store_true", ) parser.add_argument( "--initerrfact", help= "Multiply par file errors by this factor when initializing walker starting values", type=float, default=0.1, ) parser.add_argument( "--priorerrfact", help= "Multiple par file errors by this factor when setting gaussian prior widths", type=float, default=10.0, ) parser.add_argument( "--usepickle", help="Read events from pickle file, if available?", default=False, action="store_true", ) global nwalkers, nsteps, ftr args = parser.parse_args(argv) eventfile = args.eventfile parfile = args.parfile gaussianfile = args.gaussianfile weightcol = args.weightcol if args.ft2 is not None: # Instantiate Fermi observatory once so it gets added to the observatory registry get_satellite_observatory("Fermi", args.ft2) nwalkers = args.nwalkers burnin = args.burnin nsteps = args.nsteps if burnin >= nsteps: log.error("burnin must be < nsteps") sys.exit(1) nbins = 256 # For likelihood calculation based on gaussians file outprof_nbins = 256 # in the text file, for pygaussfit.py, for instance minMJD = args.minMJD maxMJD = args.maxMJD # Usually set by coverage of IERS file minWeight = args.minWeight do_opt_first = args.doOpt wgtexp = args.wgtexp # Read in initial model modelin = pint.models.get_model(parfile) # The custom_timing version below is to manually construct the TimingModel # class, which allows it to be pickled. This is needed for parallelizing # the emcee call over a number of threads. So far, it isn't quite working # so it is disabled. The code above constructs the TimingModel class # dynamically, as usual. # modelin = custom_timing(parfile) # Remove the dispersion delay as it is unnecessary # modelin.delay_funcs['L1'].remove(modelin.dispersion_delay) # Set the target coords for automatic weighting if necessary if "ELONG" in modelin.params: tc = SkyCoord( modelin.ELONG.quantity, modelin.ELAT.quantity, frame="barycentrictrueecliptic", ) else: tc = SkyCoord(modelin.RAJ.quantity, modelin.DECJ.quantity, frame="icrs") target = tc if weightcol == "CALC" else None # TODO: make this properly handle long double ts = None if args.usepickle: try: ts = toa.load_pickle(eventfile) except IOError: pass if ts is None: # Read event file and return list of TOA objects tl = fermi.load_Fermi_TOAs(eventfile, weightcolumn=weightcol, targetcoord=target, minweight=minWeight) # Limit the TOAs to ones in selected MJD range and above minWeight tl = [ tl[ii] for ii in range(len(tl)) if (tl[ii].mjd.value > minMJD and tl[ii].mjd.value < maxMJD and ( weightcol is None or tl[ii].flags["weight"] > minWeight)) ] log.info("There are %d events we will use" % len(tl)) # Now convert to TOAs object and compute TDBs and posvels ts = toa.TOAs(toalist=tl) ts.filename = eventfile ts.compute_TDBs() ts.compute_posvels(ephem="DE421", planets=False) toa.save_pickle(ts) if weightcol is not None: if weightcol == "CALC": weights = np.asarray([x["weight"] for x in ts.table["flags"]]) log.info("Original weights have min / max weights %.3f / %.3f" % (weights.min(), weights.max())) # Rescale the weights, if requested (by having wgtexp != 0.0) if wgtexp != 0.0: weights **= wgtexp wmx, wmn = weights.max(), weights.min() # make the highest weight = 1, but keep min weight the same weights = wmn + ((weights - wmn) * (1.0 - wmn) / (wmx - wmn)) for ii, x in enumerate(ts.table["flags"]): x["weight"] = weights[ii] weights = np.asarray([x["weight"] for x in ts.table["flags"]]) log.info("There are %d events, with min / max weights %.3f / %.3f" % (len(weights), weights.min(), weights.max())) else: weights = None log.info("There are %d events, no weights are being used." % ts.ntoas) # Now load in the gaussian template and normalize it gtemplate = read_gaussfitfile(gaussianfile, nbins) gtemplate /= gtemplate.mean() # Set the priors on the parameters in the model, before # instantiating the emcee_fitter # Currently, this adds a gaussian prior on each parameter # with width equal to the par file uncertainty * priorerrfact, # and then puts in some special cases. # *** This should be replaced/supplemented with a way to specify # more general priors on parameters that need certain bounds phs = 0.0 if args.phs is None else args.phs fitkeys, fitvals, fiterrs = get_fit_keyvals(modelin, phs=phs, phserr=args.phserr) for key, v, e in zip(fitkeys[:-1], fitvals[:-1], fiterrs[:-1]): if key == "SINI" or key == "E" or key == "ECC": getattr(modelin, key).prior = Prior(uniform(0.0, 1.0)) elif key == "PX": getattr(modelin, key).prior = Prior(uniform(0.0, 10.0)) elif key.startswith("GLPH"): getattr(modelin, key).prior = Prior(uniform(-0.5, 1.0)) else: getattr(modelin, key).prior = Prior( norm(loc=float(v), scale=float(e * args.priorerrfact))) # Now define the requirements for emcee ftr = emcee_fitter(ts, modelin, gtemplate, weights, phs, args.phserr) # Use this if you want to see the effect of setting minWeight if args.testWeights: log.info("Checking H-test vs weights") ftr.prof_vs_weights(use_weights=True) ftr.prof_vs_weights(use_weights=False) sys.exit() # Now compute the photon phases and see if we see a pulse phss = ftr.get_event_phases() maxbin, like_start = marginalize_over_phase(phss, gtemplate, weights=ftr.weights, minimize=True, showplot=False) log.info("Starting pulse likelihood: %f" % like_start) if args.phs is None: fitvals[-1] = 1.0 - maxbin[0] / float(len(gtemplate)) if fitvals[-1] > 1.0: fitvals[-1] -= 1.0 if fitvals[-1] < 0.0: fitvals[-1] += 1.0 log.info("Starting pulse phase: %f" % fitvals[-1]) else: log.warning("Measured starting pulse phase is %f, but using %f" % (1.0 - maxbin / float(len(gtemplate)), args.phs)) fitvals[-1] = args.phs ftr.fitvals[-1] = fitvals[-1] ftr.phaseogram(plotfile=ftr.model.PSR.value + "_pre.png") plt.close() # ftr.phaseogram() # Write out the starting pulse profile vs, xs = np.histogram(ftr.get_event_phases(), outprof_nbins, range=[0, 1], weights=ftr.weights) f = open(ftr.model.PSR.value + "_prof_pre.txt", "w") for x, v in zip(xs, vs): f.write("%.5f %12.5f\n" % (x, v)) f.close() # Try normal optimization first to see how it goes if do_opt_first: result = op.minimize(ftr.minimize_func, np.zeros_like(ftr.fitvals)) newfitvals = np.asarray(result["x"]) * ftr.fiterrs + ftr.fitvals like_optmin = -result["fun"] log.info("Optimization likelihood: %f" % like_optmin) ftr.set_params(dict(zip(ftr.fitkeys, newfitvals))) ftr.phaseogram() else: like_optmin = -np.inf # Set up the initial conditions for the emcee walkers. Use the # scipy.optimize newfitvals instead if they are better ndim = ftr.n_fit_params if like_start > like_optmin: # Keep the starting deviations small... pos = [ ftr.fitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim) for ii in range(nwalkers) ] # Set starting params for param in [ "GLPH_1", "GLEP_1", "SINI", "M2", "E", "ECC", "PX", "A1" ]: if param in ftr.fitkeys: idx = ftr.fitkeys.index(param) if param == "GLPH_1": svals = np.random.uniform(-0.5, 0.5, nwalkers) elif param == "GLEP_1": svals = np.random.uniform(minMJD + 100, maxMJD - 100, nwalkers) # svals = 55422.0 + np.random.randn(nwalkers) elif param == "SINI": svals = np.random.uniform(0.0, 1.0, nwalkers) elif param == "M2": svals = np.random.uniform(0.1, 0.6, nwalkers) elif param in ["E", "ECC", "PX", "A1"]: # Ensure all positive svals = np.fabs(ftr.fitvals[idx] + ftr.fiterrs[idx] * np.random.randn(nwalkers)) if param in ["E", "ECC"]: svals[svals > 1.0] = 1.0 - (svals[svals > 1.0] - 1.0) for ii in range(nwalkers): pos[ii][idx] = svals[ii] else: pos = [ newfitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim) for i in range(nwalkers) ] # Set the 0th walker to have the initial pre-fit solution # This way, one walker should always be in a good position pos[0] = ftr.fitvals import emcee # Following are for parallel processing tests... if 0: def unwrapped_lnpost(theta, ftr=ftr): return ftr.lnposterior(theta) import pathos.multiprocessing as mp pool = mp.ProcessPool(nodes=8) sampler = emcee.EnsembleSampler(nwalkers, ndim, unwrapped_lnpost, pool=pool, args=[ftr]) else: sampler = emcee.EnsembleSampler(nwalkers, ndim, ftr.lnposterior) # The number is the number of points in the chain sampler.run_mcmc(pos, nsteps) def chains_to_dict(names, sampler): chains = [sampler.chain[:, :, ii].T for ii in range(len(names))] return dict(zip(names, chains)) def plot_chains(chain_dict, file=False): npts = len(chain_dict) fig, axes = plt.subplots(npts, 1, sharex=True, figsize=(8, 9)) for ii, name in enumerate(chain_dict.keys()): axes[ii].plot(chain_dict[name], color="k", alpha=0.3) axes[ii].set_ylabel(name) axes[npts - 1].set_xlabel("Step Number") fig.tight_layout() if file: fig.savefig(file) plt.close() else: plt.show() plt.close() chains = chains_to_dict(ftr.fitkeys, sampler) plot_chains(chains, file=ftr.model.PSR.value + "_chains.png") # Make the triangle plot. samples = sampler.chain[:, burnin:, :].reshape((-1, ndim)) try: import corner fig = corner.corner( samples, labels=ftr.fitkeys, bins=50, truths=ftr.maxpost_fitvals, plot_contours=True, ) fig.savefig(ftr.model.PSR.value + "_triangle.png") plt.close() except ImportError: pass # Make a phaseogram with the 50th percentile values # ftr.set_params(dict(zip(ftr.fitkeys, np.percentile(samples, 50, axis=0)))) # Make a phaseogram with the best MCMC result ftr.set_params(dict(zip(ftr.fitkeys[:-1], ftr.maxpost_fitvals[:-1]))) ftr.phaseogram(plotfile=ftr.model.PSR.value + "_post.png") plt.close() # Write out the output pulse profile vs, xs = np.histogram(ftr.get_event_phases(), outprof_nbins, range=[0, 1], weights=ftr.weights) f = open(ftr.model.PSR.value + "_prof_post.txt", "w") for x, v in zip(xs, vs): f.write("%.5f %12.5f\n" % (x, v)) f.close() # Write out the par file for the best MCMC parameter est f = open(ftr.model.PSR.value + "_post.par", "w") f.write(ftr.model.as_parfile()) f.close() # Print the best MCMC values and ranges ranges = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0)), ) log.info("Post-MCMC values (50th percentile +/- (16th/84th percentile):") for name, vals in zip(ftr.fitkeys, ranges): log.info("%8s:" % name + "%25.15g (+ %12.5g / - %12.5g)" % vals) # Put the same stuff in a file f = open(ftr.model.PSR.value + "_results.txt", "w") f.write("Post-MCMC values (50th percentile +/- (16th/84th percentile):\n") for name, vals in zip(ftr.fitkeys, ranges): f.write("%8s:" % name + " %25.15g (+ %12.5g / - %12.5g)\n" % vals) f.write("\nMaximum likelihood par file:\n") f.write(ftr.model.as_parfile()) f.close() from six.moves import cPickle as pickle pickle.dump(samples, open(ftr.model.PSR.value + "_samples.pickle", "wb"))
def main(): nchunk = 25 # numnber of steps to take before reinitializing pool if len(sys.argv) > 1: sample = sys.argv[1] else: print( "The first positional argument must be the galaxy sample, e.g. 'sample_1'." ) sys.exit() chain_dir = './chains/' # load parameters for sample _temp = __import__(sample + '_fitting_params') params = _temp.params # retreive parameters for the galaxy sample mag_lim = params['mag_lim'][0] ndim = params['ndim'] nwalkers = params['nwalkers'] nthreads = params['nthreads'] nsteps = params['nsteps'] # initialize walkers pos0 = [ params['theta0'] + params['dtheta'] * np.random.randn(params['ndim']) for i in range(params['nwalkers']) ] # check multiprocessing arguments ncpu = cpu_count() print("Using {0} CPU cores out of a possible {1}.".format(nthreads, ncpu)) # load sdss measurements t = Table.read(params['comparison_fname'], format='ascii') y = t['frequency'] yerr = t['err'] # Set up the backend # Don't forget to clear it in case the file already exists filename = chain_dir + sample + '_chain.hdf5' backend = emcee.backends.HDFBackend(filename) if params['continue_chain'] == False: backend.reset(nwalkers, ndim) else: print("Initial number of steps: {0}".format(backend.iteration)) # retrieve final position of chains samples = backend.get_chain() pos0 = samples.T[:, :, -1].T # run first batch of steps print('starting initial pool...') pool = Pool(processes=nthreads) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, backend=backend, args=(y, yerr, mag_lim), pool=pool) if nchunk > nsteps: nsteps0 = nsteps else: nsteps0 = nchunk sampler.run_mcmc(pos0, nsteps0, progress=True) print('closing pool...') pool.close() # loop through the remaining steps for i in range(1, nsteps // nchunk): print('starting new pool...') pool = Pool(processes=nthreads) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, backend=backend, args=(y, yerr, mag_lim), pool=pool) sampler.run_mcmc(None, nchunk, progress=True) print('closing pool...') pool.close() # take ramaining steps if nchunk > nsteps: nremainder = 0.0 else: nremainder = nsteps % nchunk if nremainder > 0: print('starting new pool...') pool = Pool(processes=nthreads) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, backend=backend, args=(y, yerr, mag_lim), pool=pool) sampler.run_mcmc(None, nremainder, progress=True) print('closing pool...') pool.close() print("Final number of steps: {0}".format(backend.iteration))
def mcmc_negfc_sampling(cube, angs, psfn, ncomp, plsc, initial_state, fwhm=4, annulus_width=8, aperture_radius=1, cube_ref=None, svd_mode='lapack', scaling='temp-mean', algo=pca_annulus, delta_rot=1, fmerit='sum', imlib='opencv', interpolation='lanczos4', collapse='median', nwalkers=1000, bounds=None, a=2.0, burnin=0.3, rhat_threshold=1.01, rhat_count_threshold=1, niteration_min=0, niteration_limit=1e2, niteration_supp=0, check_maxgap=1e4, nproc=1, output_dir='results/', output_file=None, display=False, verbosity=0, save=False): r""" Runs an affine invariant mcmc sampling algorithm in order to determine the position and the flux of the planet using the 'Negative Fake Companion' technique. The result of this procedure is a chain with the samples from the posterior distributions of each of the 3 parameters. This technique can be summarized as follows: 1) We inject a negative fake companion (one candidate) at a given position and characterized by a given flux, both close to the expected values. 2) We run PCA on an full annulus which pass through the initial guess, regardless of the position of the candidate. 3) We extract the intensity values of all the pixels contained in a circular aperture centered on the initial guess. 4) We calculate the function of merit. The associated chi^2 is given by chi^2 = sum(\|I_j\|) where j \in {1,...,N} with N the total number of pixels contained in the circular aperture. The steps 1) to 4) are looped. At each iteration, the candidate model parameters are defined by the emcee Affine Invariant algorithm. Parameters ---------- cube: numpy.array ADI fits cube. angs: numpy.array The parallactic angle vector. psfn: numpy.array PSF array. The PSF must be centered and the flux in a 1*FWHM aperture must equal 1 (use ``vip_hci.phot.psf_norm``). ncomp: int The number of principal components. plsc: float The platescale, in arcsec per pixel. annulus_width: float, optional The width in pixels of the annulus on which the PCA is performed. aperture_radius: float, optional The radius in FWHM of the circular aperture. nwalkers: int optional The number of Goodman & Weare 'walkers'. initial_state: numpy.array The first guess for the position and flux of the planet, respectively. Each walker will start in a small ball around this preferred position. cube_ref : numpy ndarray, 3d, optional Reference library cube. For Reference Star Differential Imaging. svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional Switch for different ways of computing the SVD and selected PCs. 'randsvd' is not recommended for the negative fake companion technique. scaling : {'temp-mean', 'temp-standard'} or None, optional With None, no scaling is performed on the input data before SVD. With "temp-mean" then temporal px-wise mean subtraction is done and with "temp-standard" temporal mean centering plus scaling to unit variance is done. fmerit : {'sum', 'stddev'}, string optional Chooses the figure of merit to be used. stddev works better for close in companions sitting on top of speckle noise. imlib : str, optional See the documentation of the ``vip_hci.preproc.frame_rotate`` function. interpolation : str, optional See the documentation of the ``vip_hci.preproc.frame_rotate`` function. collapse : {'median', 'mean', 'sum', 'trimmean', None}, str or None, optional Sets the way of collapsing the frames for producing a final image. If None then the cube of residuals is used when measuring the function of merit (instead of a single final frame). bounds: numpy.array or list, default=None, optional The prior knowledge on the model parameters. If None, large bounds will be automatically estimated from the initial state. a: float, default=2.0 The proposal scale parameter. See notes. burnin: float, default=0.3 The fraction of a walker which is discarded. rhat_threshold: float, default=0.01 The Gelman-Rubin threshold used for the test for nonconvergence. rhat_count_threshold: int, optional The Gelman-Rubin test must be satisfied 'rhat_count_threshold' times in a row before claiming that the chain has converged. niteration_min: int, optional Steps per walker lower bound. The simulation will run at least this number of steps per walker. niteration_limit: int, optional Steps per walker upper bound. If the simulation runs up to 'niteration_limit' steps without having reached the convergence criterion, the run is stopped. niteration_supp: int, optional Number of iterations to run after having "reached the convergence". check_maxgap: int, optional Maximum number of steps per walker between two Gelman-Rubin test. nproc: int, optional The number of processes to use for parallelization. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. output_file: str, optional The name of the output file which contains the MCMC results in the case ``save`` is True. display: bool, optional If True, the walk plot is displayed at each evaluation of the Gelman- Rubin test. verbosity: 0, 1 or 2, optional Verbosity level. 0 for no output and 2 for full information. save: bool, optional If True, the MCMC results are pickled. Returns ------- out : numpy.array The MCMC chain. Notes ----- The parameter ``a`` must be > 1. For more theoretical information concerning this parameter, see Goodman & Weare, 2010, Comm. App. Math. Comp. Sci., 5, 65, Eq. [9] p70. The parameter 'rhat_threshold' can be a numpy.array with individual threshold value for each model parameter. """ if verbosity == 1 or verbosity == 2: start_time = time_ini() print(" MCMC sampler for the NEGFC technique ") print(sep) # If required, one create the output folder. if save: output_file_tmp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") if output_dir[-1] == '/': output_dir = output_dir[:-1] try: os.makedirs(output_dir) except OSError as exc: if exc.errno == 17 and os.path.isdir(output_dir): # errno.EEXIST == 17 -> File exists pass else: raise if not isinstance(cube, np.ndarray) or cube.ndim != 3: raise ValueError('`cube` must be a 3D numpy array') if cube_ref is not None: if not isinstance(cube_ref, np.ndarray) or cube_ref.ndim != 3: raise ValueError('`cube_ref` must be a 3D numpy array') # ######################################################################### # Initialization of the variables # ######################################################################### dim = 3 # There are 3 model parameters: rad, theta, flux itermin = niteration_min limit = niteration_limit supp = niteration_supp maxgap = check_maxgap initial_state = np.array(initial_state) if itermin > limit: itermin = 0 fraction = 0.3 geom = 0 lastcheck = 0 konvergence = np.inf rhat_count = 0 chain = np.empty([nwalkers, 1, dim]) isamples = np.empty(0) pos = initial_state + np.random.normal(0, 1e-1, (nwalkers, 3)) nIterations = limit + supp rhat = np.zeros(dim) stop = np.inf if bounds is None: bounds = [ (initial_state[0] - annulus_width / 2., initial_state[0] + annulus_width / 2.), # radius (initial_state[1] - 10, initial_state[1] + 10), # angle (0, 2 * initial_state[2]) ] # flux sampler = emcee.EnsembleSampler(nwalkers, dim, lnprob, a, args=([ bounds, cube, angs, plsc, psfn, fwhm, annulus_width, ncomp, aperture_radius, initial_state, cube_ref, svd_mode, scaling, algo, delta_rot, fmerit, imlib, interpolation, collapse ]), threads=nproc) start = datetime.datetime.now() # ######################################################################### # Affine Invariant MCMC run # ######################################################################### if verbosity == 2: print('\nStart of the MCMC run ...') print( 'Step | Duration/step (sec) | Remaining Estimated Time (sec)') for k, res in enumerate( sampler.sample(pos, iterations=nIterations, storechain=True)): elapsed = (datetime.datetime.now() - start).total_seconds() if verbosity == 2: if k == 0: q = 0.5 else: q = 1 print('{}\t\t{:.5f}\t\t\t{:.5f}'.format( k, elapsed * q, elapsed * (limit - k - 1) * q)) start = datetime.datetime.now() # --------------------------------------------------------------------- # Store the state manually in order to handle with dynamical sized chain # --------------------------------------------------------------------- # Check if the size of the chain is long enough. s = chain.shape[1] if k + 1 > s: # if not, one doubles the chain length empty = np.zeros([nwalkers, 2 * s, dim]) chain = np.concatenate((chain, empty), axis=1) # Store the state of the chain chain[:, k] = res[0] # --------------------------------------------------------------------- # If k meets the criterion, one tests the non-convergence. # --------------------------------------------------------------------- criterion = np.amin([ np.ceil(itermin * (1 + fraction)**geom), lastcheck + np.floor(maxgap) ]) if k == criterion: if verbosity == 2: print('\n Gelman-Rubin statistic test in progress ...') geom += 1 lastcheck = k if display: show_walk_plot(chain) if save: import pickle fname = '{d}/{f}_temp_k{k}'.format(d=output_dir, f=output_file_tmp, k=k) data = { 'chain': sampler.chain, 'lnprob': sampler.lnprobability, 'AR': sampler.acceptance_fraction } with open(fname, 'wb') as fileSave: pickle.dump(data, fileSave) # We only test the rhat if we have reached the min # of steps if (k + 1) >= itermin and konvergence == np.inf: thr0 = int(np.floor(burnin * k)) thr1 = int(np.floor((1 - burnin) * k * 0.25)) # We calculate the rhat for each model parameter. for j in range(dim): part1 = chain[:, thr0:thr0 + thr1, j].reshape(-1) part2 = chain[:, thr0 + 3 * thr1:thr0 + 4 * thr1, j].reshape(-1) series = np.vstack((part1, part2)) rhat[j] = gelman_rubin(series) if verbosity == 1 or verbosity == 2: print(' r_hat = {}'.format(rhat)) cond = rhat <= rhat_threshold print(' r_hat <= threshold = {} \n'.format(cond)) # We test the rhat. if (rhat <= rhat_threshold).all(): rhat_count += 1 if rhat_count < rhat_count_threshold: if verbosity == 1 or verbosity == 2: msg = "Gelman-Rubin test OK {}/{}" print(msg.format(rhat_count, rhat_count_threshold)) elif rhat_count >= rhat_count_threshold: if verbosity == 1 or verbosity == 2: print('... ==> convergence reached') konvergence = k stop = konvergence + supp else: rhat_count = 0 # We have reached the maximum number of steps for our Markov chain. if k + 1 >= stop: if verbosity == 1 or verbosity == 2: print('We break the loop because we have reached convergence') break if k == nIterations - 1: if verbosity == 1 or verbosity == 2: print("We have reached the limit # of steps without convergence") # ######################################################################### # Construction of the independent samples # ######################################################################### temp = np.where(chain[0, :, 0] == 0.0)[0] if len(temp) != 0: idxzero = temp[0] else: idxzero = chain.shape[1] idx = int(np.amin([np.floor(2e5 / nwalkers), np.floor(0.1 * idxzero)])) if idx == 0: isamples = chain[:, 0:idxzero, :] else: isamples = chain[:, idxzero - idx:idxzero, :] if save: import pickle frame = inspect.currentframe() args, _, _, values = inspect.getargvalues(frame) input_parameters = {j: values[j] for j in args[1:]} output = { 'isamples': isamples, 'chain': chain_zero_truncated(chain), 'input_parameters': input_parameters, 'AR': sampler.acceptance_fraction, 'lnprobability': sampler.lnprobability } if output_file is None: output_file = 'MCMC_results' with open(output_dir + '/' + output_file, 'wb') as fileSave: pickle.dump(output, fileSave) msg = "\nThe file MCMC_results has been stored in the folder {}" print(msg.format(output_dir + '/')) if verbosity == 1 or verbosity == 2: timing(start_time) return chain_zero_truncated(chain)
#plot.show() #quit() # nDim = 3 + nTemplates_eD nWalkers = 500 p0 = [coefficients + 5e-4 * np.random.randn(nDim) for i in range(nWalkers)] for walker in p0: for idx, par in enumerate(walker): if par <= 0: walker[idx] = 1 sampler = emcee.EnsembleSampler( nWalkers, nDim, lnprob, kwargs={'observables': observedTOF, 'standoffs': standoffs, 'tofbinnings': tofRunBins, 'tofranges': tof_range, 'templates': shapeTemplates}, threads=8) fout = open('burninchain.dat','w') burninSteps = 10000 for i,samplerOut in enumerate(sampler.sample(p0, iterations=burninSteps)): burninPos, burninProb, burninRstate = samplerOut if i%50 == 0: print('burn-in step {} of {}'.format(i, burninSteps)) if i%10 == 0: # only save every 10th step fout = open('burninchain.dat','a') for k in range(burninPos.shape[0]): fout.write('{} {} {}\n'.format(k, burninPos[k], burninProb[k]))
if ifmcmc: print("enabling Ensemble sampler.") # pos0=[para_guess + 1.0e-7*np.random.randn(ndim) for j in range(nwalkers)] pos0 = [ np.array([ np.random.uniform(low=para_limits[idim][0], high=para_limits[idim][1]) for idim in range(ndim) ]) for iwalker in range(nwalkers) ] with Pool() as pool: sampler = emcee.EnsembleSampler( nwalkers, ndim, lnpost, pool=pool) #, args=(para_limits, obj_obs, xpdv, ypdv)) # # burn-in print("start burning in. nburn:", nburn) for j, result in enumerate( sampler.sample(pos0, iterations=nburn, thin=10)): display_bar(j, nburn) pass sys.stdout.write("\n") pos, _, _ = result sampler.reset() # actual iteration print("start iterating. nsteps:", nsteps) for j, result in enumerate(sampler.sample(pos, iterations=nsteps)):
def run_emcee_sampler(lnprobf, initial_center, model, verbose=True, postargs=[], postkwargs={}, prob0=None, nwalkers=None, nburn=[16], niter=32, walker_factor=4, nthreads=1, pool=None, hdf5=None, interval=1, **kwargs): """Run an emcee sampler, including iterations of burn-in and re - initialization. Returns the production sampler. :param lnprobfn: The posterior probability function. :param initial_center: The initial center for the sampler ball :param model: An instance of a models.ProspectorParams object. :param postargs: Positional arguments for ``lnprobfn``. :param postkwargs: Keyword arguments for ``lnprobfn``. :param nwalkers: The number of walkers to use. If None, use the nearest power of two to ``ndim * walker_factor``. :param niter: Number of iterations for the production run :param nburn: List of the number of iterations to run in each round of brun-in (for removing stuck walkers) :param pool: (optional) A ``Pool`` object, either from ``multiprocessing`` or from ``emcee.mpi_pool``. :param hdf5: (optional) H5py.File object that will be used to store the chain in the datasets ``"chain"`` and ``"lnprobability"``. If not set, the chin will instead be stored as a numpy array in the returned sampler object :param interval: Fraction of the full run at which to flush to disk, if using hdf5 for output. """ # Get dimensions ndim = model.ndim if nwalkers is None: nwalkers = int(2 ** np.round(np.log2(ndim * walker_factor))) if verbose: print('number of walkers={}'.format(nwalkers)) # Initialize sampler esampler = emcee.EnsembleSampler(nwalkers, ndim, lnprobf, args=postargs, kwargs=postkwargs, threads=nthreads, pool=pool) # Burn in sampler initial, in_cent, in_prob = emcee_burn(esampler, initial_center, nburn, model, prob0=prob0, verbose=verbose, **kwargs) # Production run esampler.reset() if hdf5 is not None: # Set up hdf5 backend sdat = hdf5.create_group('sampling') chain = sdat.create_dataset("chain", (nwalkers, niter, ndim)) lnpout = sdat.create_dataset("lnprobability", (nwalkers, niter)) # blob = hdf5.create_dataset("blob") storechain = False else: storechain = True # Main loop over iterations of the MCMC sampler if verbose: print('starting production') for i, result in enumerate(esampler.sample(initial, iterations=niter, storechain=storechain)): if hdf5 is not None: chain[:, i, :] = result[0] lnpout[:, i] = result[1] if (np.mod(i+1, int(interval*niter)) == 0) or (i+1 == niter): # do stuff every once in awhile # this would be the place to put some callback functions # e.g. [do(result, i, esampler) for do in things_to_do] # like, should probably store the random state too. hdf5.flush() if verbose: print('done production') return esampler, in_cent, in_prob
def fit_mcmc(ne_data, tspec_data, nemodel, clustermeta, ml_results, Ncores=params.Ncores, Nwalkers=params.Nwalkers, Nsteps=params.Nsteps, Nburnin=params.Nburnin): ''' Perform a MCMC analysis on the free parameters of the cluster total gravitating mass model, utilizing the ensemble sampler of emcee. Args: ----- ne_data (astropy table): observed gas density profile in the form established by set_prof_data() tspec_data (astropy table): observed temperature profile in the form established by set_prof_data() nemodel (dictionary): dictionary storing the gas density profile model as output in fit_density() clustermeta (dictionary): dictionary of cluster and analysis info produced by set_prof_data() ml_results (array): maximum-likelihood parameter estimation for mass model free params of the form [c_ml, rs_ml, normsersic_ml] Ncores (int): number of cores overwhich to run MCMC analysis Nwalkers (int): number of MCMC ensemble walkers Nsteps (int): number of steps each walker takes Nburnin (int): number of steps considered to be a part of the burn-in period of the chain; these burn-in steps will be excluded from the final MCMC parameter estimation Returns: -------- samples (array): MCMC samples of posterior distribution; of the form: col 1: c col 2: rs col 3: log(normsersic) NB: length of samples array set by Nwalkers * Nsteps References: ----------- emcee: https://github.com/dfm/emcee + general setup for using emcee to fit a model to data: http://dfm.io/emcee/current/user/line/ ''' # initialize walkers - result comes from ML fit before if clustermeta['incl_mstar'] == 1: ndim, nwalkers = 3, Nwalkers elif clustermeta['incl_mstar'] == 0: ndim, nwalkers = 2, Nwalkers pos = [ml_results + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)] # sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(tspec_data['radius'], tspec_data['tspec'], tspec_data['tspec_err'], ne_data, tspec_data, nemodel, clustermeta), threads=Ncores) # WHY ARE THE ARGS THE WAY THEY ARE??? # # run ensemble sampler for given number of steps # start=time.time() # sampler.run_mcmc(pos, Nsteps) # end=time.time() # print end-start for i, result in enumerate(sampler.sample(pos, iterations=Nsteps)): if 100. * ((float(i + 1.)) / Nsteps) % 10 == 0: print 'MCMC progress: ' + "{0:5.1%}".format(float(i + 1.) / Nsteps) samples = sampler.chain[:, Nburnin:, :].reshape((-1, ndim)) # length of samples = walkers*steps # check acceptance rate: goal between 0.2-0.5 # print 'acceptance rate of walkers:' # print sampler.acceptance_fraction # check autocorrelation time try: print 'autocorrelation time:', sampler.acor except: print 'autocorrelation time cannot be calculated' print '' # print emcee.autocorr.integrated_time() return samples, sampler
def train(self, X, y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y) if self.y_std == 0: raise ValueError("Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) sampler.random_state = self.rng.get_state() # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True
def __init__(self, X, Y, Sigma, theta0, Niter=100, covfunction=covariance.SquaredExponential, Xstar=None, cXstar=None, mu=None, muargs=(), prior=None, priorargs=(), scale0=None, a=2.0, threads=1, nacor=10, nsample=50, sampling='True'): if (scale0 != None): assert (len(theta0) == len(scale0)) ,\ "Lengths of theta0 and scale0 must be identical." self.pos = concatenate((theta0, reshape(scale0, (len(scale0), 1))), axis=1) self.sc0 = True scale = scale0[0] else: self.pos = theta0 self.sc0 = False scale = None gp.GaussianProcess.__init__(self, X, Y, Sigma, covfunction, theta0[0, :], Xstar, cXstar, mu, muargs, prior, gradprior=None, priorargs=priorargs, thetatrain='False', scale=scale, scaletrain='False') self.theta0 = theta0 self.scale0 = scale0 self.covfunction = covfunction self.Niter = Niter self.a = a self.threads = threads self.nacor = nacor self.nsample = nsample self.sampling = sampling (self.nwalkers, self.ndim) = shape(self.pos) if (sampling == 'True'): try: import emcee except ImportError: print( "Error: MCMCGaussianProcess requires the python package emcee." ) print( "emcee can be installed from http://github.com/dfm/emcee") raise SystemExit try: import acor except ImportError: print( "Error: MCMCGaussianProcess requires the python package acor." ) print("acor can be installed from http://github.com/dfm/acor") raise SystemExit self.sampler = emcee.EnsembleSampler( self.nwalkers, self.ndim, mcmc_log_likelihood, args=(self.sc0, self.X, self.Y_mu, self.Sigma, covfunction, prior, priorargs), a=a, threads=threads)
def sample_emcee(self, nwalkers=500, samples=10, dispersion=.1, burn=5, thin=1, stretch_width=2., anneal_stretch=True, pool=None): import emcee import pymc.progressbar as pbar # This is the likelihood function for emcee lnprob = LnProb(self) # init self.mcmc() # get current values stochs = self.get_stochastics() start = [node_descr['node'].value for name, node_descr in stochs.iterrows()] ndim = len(start) def init_from_priors(): p0 = np.empty((nwalkers, ndim)) i = 0 while i != nwalkers: self.mc.draw_from_prior() try: self.mc.logp p0[i, :] = [node_descr['node'].value for name, node_descr in stochs.iterrows()] i += 1 except pm.ZeroProbability: continue return p0 if hasattr(self, 'emcee_dispersions'): scale = np.empty_like(start) for i, (name, node_descr) in enumerate(stochs.iterrows()): knode_name = node_descr['knode_name'].replace('_subj', '') scale[i] = self.emcee_dispersions.get(knode_name, 0.1) else: scale = 0.1 p0 = np.random.randn(ndim * nwalkers).reshape((nwalkers, ndim)) * scale * dispersion + start #p0 = init_from_priors() # instantiate sampler passing in the pymc likelihood function sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=stretch_width, pool=pool) bar = pbar.progress_bar(burn + samples) i = 0 annealing = np.linspace(stretch_width, 2, burn) sys.stdout.flush() for pos, prob, state in sampler.sample(p0, iterations=burn): if anneal_stretch: sampler.a = annealing[i] i += 1 bar.update(i) #print("\nMean acceptance fraction during burn-in: {}".format(np.mean(sampler.acceptance_fraction))) sampler.reset() # sample try: for p, lnprob, lnlike in sampler.sample(pos, iterations=samples, thin=thin): i += 1 bar.update(i) except KeyboardInterrupt: pass finally: print(("\nMean acceptance fraction during sampling: {}".format(np.mean(sampler.acceptance_fraction)))) # restore state for val, (name, node_descr) in zip(start, stochs.iterrows()): node_descr['node'].set_value(val) # Save samples back to pymc model self.mc.sample(1, progress_bar=False) # This call is to set up the chains for pos, (name, node) in enumerate(stochs.iterrows()): node['node'].trace._trace[0] = sampler.flatchain[:, pos] return sampler
def bayesian_odds_ratio(airmasses, filters, astrometric_error=0.020, zshift=2.1): plot_points = True plot_walkers = False intercept_fixed = True np.random.seed(0) tanZList, RList = calcR(airmasses, filters, zshift=zshift) n = len(tanZList) def lnlike(theta, x, y, yerr, type): if type=="flat": b = theta model = 0.0 * x + b if type=="slope": if intercept_fixed == True: m = theta model = m * x + 0.0 else: m, b = theta model = m * x + b inv_sigma2 = 1.0/(yerr**2.) return -0.5*(np.sum(((y-model)**2.*inv_sigma2 - np.log(inv_sigma2)))) def lnprior(theta, type): if type=="flat": b = theta if (-1.0 < b < 1.0): return 0.0 return -np.inf if type=="slope": if intercept_fixed == True: m = theta if (-1.0 < m < 1.0): return 0.0 return -np.inf else: m, b = theta if (-1.0 < m < 1.0) and (-1.0 < b < 1.0): return 0.0 return -np.inf def lnprob(theta, x, y, yerr, type=None): if type=="flat" or type=="slope": lp = lnprior(theta, type) if not np.isfinite(lp): return -np.inf return lp + lnlike(theta, x, y, yerr, type) else: print "must specify flat or slope" return np.nan nll = lambda *args: -lnprob(*args) nsteps, nwalkers = 500, 100 x = np.copy(tanZList) y = np.copy(RList) yerr = np.sqrt((astrometric_error**2.)+(astrometric_error**2.)) offset = yerr * np.random.randn(n) + 0.0 pm = np.random.choice([-1.0,1.0], size=n, replace=True) y += offset if plot_points == True: fig1 = plt.figure(1) #plt.plot(tanZList, RList, 'o', color=colors[0]) plt.errorbar(x, y, yerr=yerr, fmt='.', color=colors[1]) if intercept_fixed == True: ndim = 1 result = scipy.optimize.minimize(nll, [-0.001], args=(x, y, yerr, "slope"), method="Nelder-Mead") m_ml = result["x"] else: ndim = 2 result = scipy.optimize.minimize(nll, [-0.001, 0.0], args=(x, y, yerr, "slope"), method="Nelder-Mead") m_ml, b_ml = result["x"] pos = [result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr, "slope")) sampler.run_mcmc(pos, nsteps) samples = sampler.chain[:, 50:, :].reshape((-1, ndim)) ms = samples[np.random.randint(len(samples), size=100)][:,0] if intercept_fixed == True: m_mcmc_slope = map(lambda v: (v[1]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) else: m_mcmc_slope, b_mcmc_slope = map(lambda v: (v[1]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) if plot_walkers == True: fig2 = plt.figure(2) ax1 = plt.subplot(211) for i in range(nwalkers): ax1.plot(sampler.chain[i,:,0],color='k',alpha=0.05) ax1.axhline(y=m_mcmc_slope, xmin=0, xmax=nsteps, color='r') #ax2 = plt.subplot(412) #for i in range(nwalkers): # ax2.plot(sampler.chain[i,:,1],color='k',alpha=0.05) # ax2.axhline(y=lnf_mcmc_slope, xmin=0, xmax=nsteps, color='r') xs = np.arange(min(tanZList), max(tanZList), 0.01) if plot_points == True: plt.figure(1) if intercept_fixed == True: plt.plot(xs, m_mcmc_slope*xs + 0.0, color=colors[2], lw=3) for m in samples[np.random.randint(len(samples), size=100)]: plt.plot(xs, m*xs + 0.0, color=colors[2], lw=1, alpha=0.2) else: plt.plot(xs, m_mcmc_slope*xs + b_mcmc_slope, color=colors[2], lw=3) for m, b in samples[np.random.randint(len(samples), size=100)]: plt.plot(xs, m*xs + b, color=colors[2], lw=1, alpha=0.2) ndim = 1 result = scipy.optimize.minimize(nll, [0.0], args=(x, y, yerr, "flat"), method="Nelder-Mead") b_ml = result["x"] pos = [result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr, "flat")) sampler.run_mcmc(pos, nsteps) samples = sampler.chain[:, 50:, :].reshape((-1, ndim)) ms = samples[np.random.randint(len(samples), size=100)][:,0] b_mcmc_flat = map(lambda v: (v[1]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) xs = np.arange(min(tanZList), max(tanZList), 0.01) if plot_points == True: plt.figure(1) plt.plot(xs, 0.0*xs + b_mcmc_flat, color=colors[4], lw=3) for b in samples[np.random.randint(len(samples), size=100)]: plt.plot(xs, 0.0*xs + b, color=colors[4], lw=1, alpha=0.2) if plot_walkers == True: plt.figure(2) ax3 = plt.subplot(212) for i in range(nwalkers): ax3.plot(sampler.chain[i,:,0],color='k',alpha=0.05) ax3.axhline(y=b_mcmc_flat, xmin=0, xmax=nsteps, color='r') #ax4 = plt.subplot(414) #for i in range(nwalkers): # ax4.plot(sampler.chain[i,:,1],color='k',alpha=0.05) # ax4.axhline(y=lnf_mcmc_flat, xmin=0, xmax=nsteps, color='r') plt.savefig('walkers_modelcompariosn_test_' + filters[0] + '_' + str(max(airmasses)-min(airmasses)).replace(".", "") + '_' + str(astrometric_error).replace(".", "") + '_' + str(zshift).replace(".", "") + '_' + str(n) + '.png') plt.clf() if intercept_fixed == True: model_slope = m_mcmc_slope * x + 0.0 else: model_slope = m_mcmc_slope * x + b_mcmc_slope model_flat = 0.0 * x + b_mcmc_flat inv_sigma2 = 1.0/(yerr**2.) slope_loglikelihood = (-0.5*(np.sum(((y-model_slope)**2.*inv_sigma2 - np.log(inv_sigma2))))) flat_loglikelihood = (-0.5*(np.sum(((y-model_flat)**2.*inv_sigma2 - np.log(inv_sigma2))))) bayes_ratio = np.e**(slope_loglikelihood - flat_loglikelihood) if plot_points == True: plt.figure(1) #plt.text(0.1, 0.05, str(bayes_ratio), ha='left', va='center') #plt.text(0.1, 0.00, str(slope_loglikelihood), ha='left', va='center') #plt.text(0.1, -0.05, str(flat_loglikelihood), ha='left', va='center') plt.xticks(np.arange(-0.5, 2.5, 0.25),size=14) plt.yticks(np.arange(-0.4, 0.4, 0.10),size=14) plt.xlim(-0.01,2.3) plt.ylim(-0.35, 0.35) plt.xlabel(r'$\tan (Z)$',size=14) plt.ylabel(r'$\Delta R_{||}$ (arcsec)',size=14) plt.savefig('offset_tanZ_modelcompariosn_' + filters[0] + '_' + str(max(airmasses)-min(airmasses)).replace(".", "") + '_' + str(astrometric_error).replace(".", "") + '_' + str(zshift).replace(".", "") + '_' + str(n) + '.png') plt.clf() if not np.isfinite(bayes_ratio): print "Redshift: ", zshift, " Airmass Range: ", min(airmasses), " - ", max(airmasses), " Number of Observations: ", n, " Astrometric Error: ", astrometric_error print min(x), max(x), min(y), max(y) print slope_loglikelihood, flat_loglikelihood print bayes_ratio return bayes_ratio
def run_emcee(func, pos0, dpos=None, nwalkers=200, nsamps=200, nburn=50, verbose=False, conv_F=50, conv_perc=0.01, **kwargs): """ Function to run the emcee sampler on a given likelihood function. Parameters ---------- func: function Likelihood function to sample, this must obey the requirements of the `emcee` module. pos: list(float) A best guess of the parameter values to initiate the sampler. dpos: list(float) A best guess of the parameter uncertainties to guide the initial steps (optional,default=None). nwalkers: int Number of independent walkers to start (optional, default=100). nsamps: int Number of samples to be taken by each walker (optional, default=500). nbrun: int Number of samples to be taken as burn-in, and discarded before returning the chain. Returns ------- Dictionary containing the parameter chains. """ if verbose: print("Sampling") ndim = len(pos0) #Set up initial displacement amplitudes if dpos is None: dpos = 1e-2 * np.ones(ndim) dp = np.zeros(ndim) for i, d in enumerate(dpos): if ((d is None) or (d <= 0)): dp[i] = 1e-2 else: dp[i] = d * 0.1 # initial positions of the walkers pos = [pos0 + dp * np.random.randn(ndim) for i in range(nwalkers)] # Do MCMC sampling, with early stopping if the convergence criterion is # met. sampler = emcee.EnsembleSampler(nwalkers, ndim, func, **kwargs) # This will be useful to testing convergence old_tau = np.inf # Now we'll sample for up to nsamps steps autocorr = [] for _ in sampler.sample(pos, iterations=nsamps): # Only check convergence every 100 steps if sampler.iteration % 100: continue # Compute the autocorrelation time so far # Using tol=0 means that we'll always get an estimate even # if it isn't trustworthy tau = sampler.get_autocorr_time(tol=0) autocorr.append((sampler.iteration, np.mean(tau))) print(autocorr[-1]) # Check convergence converged = np.all(tau * conv_F < sampler.iteration) converged &= np.all(np.abs(old_tau - tau) / tau < conv_perc) if converged: break old_tau = tau autocorr = np.array(autocorr) samples = sampler.chain[:, nburn:, :].reshape((-1, ndim)) return {'chains': samples, 'autocorr': autocorr}
def bayesian_fit(func, data_x, data_y, lnprior_parameters, init_params, n_walkers=10, n_iterations=1000, varnames = None): """Main function to perform the fit Takes the function and data to fit as an argument and gives back a dataframe with the sampling of the fitting parameters. Except for an initial transitory phase where the algorithm converges to a steady state (the length of which should be checked, but should be no more than a few hundred steps), this sampling gives the probability distribution of the fitting parameters given the data and priors. Parameters: func (function): function to be fitted that has exactly two arguments, input x and a list of parameters. See functions_library for examples. data_x (num array): data array of input to the function data_y (num array): data array of the outputs of the function to data_x lnprior_parameters (list of functions): list of the prior functions corresponding to the list of parameters to be fitted. Those represent initial probability distributions (on a logarithmic scale, with -np.inf for zero probability) of each parameter. In practice, for any data that is not mostly noise, this should not influence much the outcome (but ideally that fact should be checked). It should give a vague idea of the expected scale of the parameter or of a possible range (for instance maybe the parameter must be positive to make sense). See the example of priors defined below. init_params (list of functions): functions used to generate the initial parameters for each Markov chain. Usually they are chosen randomly around a value that roughly makes sense for the data. n_walkers (int>0): number of Markov chains to use n_iterations (int>0): number of iterations for each chain varnames (list of str): names of the parameters to fit Returns: Pandas dataframe: dataframe containing the values of all the parameters (each represented by a data column) for each walker and each chain (represented with a multiindindex, see https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html) """ if len(lnprior_parameters) != len(init_params): raise Exception('The length of lnprior_parameters and init_params do not match!') if varnames: if len(varnames) != len(init_params): raise Exception('The length of varnames does not match the length of init_params!') def lnprior(theta): """Prior function in the Bayesian inference, in log scale """ sigma = theta[-1] lnprior_sum = lnprior_parameters[-1](sigma) for p, lnprior_p in zip(theta[:-1], lnprior_parameters[:-1]): lnprior_sum += lnprior_p(p) return lnprior_sum def lnlike(theta, x, y): """Likelihood function in the Bayesian inference, in log scale """ sigma = theta[-1] ymod = func(x, theta[:-1]) #return -0.5 * np.sum( ((y-ymod)/sigma)**2 + 2*np.log(sigma) ) return -0.5 * np.sum( (((y.real - ymod.real)**2 + (y.imag-ymod.imag)**2)/sigma**2) + 2*np.log(sigma) ) def lnprob(theta, x, y): """Unnormalized Bayesian probability in log scale """ lp = lnprior(theta) if not np.isfinite(lp): return -np.inf else: return lnprior(theta) + lnlike(theta, x, y) params_0 = [ np.array([ip() for ip in init_params]) for i in range(n_walkers)] ndim = len(init_params) sampler = emcee.EnsembleSampler(n_walkers, ndim, lnprob, args=(data_x, data_y)) sampler.run_mcmc(params_0, n_iterations) if varnames is None: varnames = [ 'p{}'.format(i) for i in range(ndim-1) ] varnames.append('sigma') varnames = list(varnames) iterations = range(n_iterations) walkers = range(n_walkers) index = pd.MultiIndex.from_product([walkers, iterations], names=('Walker', 'Iteration')) samples_df = pd.DataFrame( sampler.chain.reshape((n_walkers*n_iterations, len(varnames))), index=index, columns=varnames) return samples_df
def run_mcmc(model_path, Q_uvb, ions_to_use, true_Q=18, uvb='KS18', figname='testT.pdf', same_error=False): # run_mcmc(model_Q= model, ions_to_use= ions) # ------------------ here is a way to run code truths = [-4, -1] # (lognH, logZ, logT) true values number_of_ions = len(ions_to_use) data_col_all = get_true_model(model_path, Q=true_Q) # converting astropy table row to a list data_col = [] for name in ions_to_use: data_col.append(data_col_all[name][0]) np.random.seed(0) if same_error: sigma_col = 0.2 * np.ones(number_of_ions) else: sigma_col = np.random.uniform(0.1, 0.3, number_of_ions) print(np.log10(data_col), sigma_col) interp_logf = get_interp_func(model_path=model_path, ions_to_use=ions_to_use, Q_uvb=Q_uvb, uvb=uvb) # Here we'll set up the computation. emcee combines multiple "walkers", # each of which is its own MCMC chain. The number of trace results will # be nwalkers * nsteps ndim = 2 # number of parameters in the model nwalkers = 50 # number of MCMC walkers nsteps = 5000 # number of MCMC steps to take # set theta near the maximum likelihood, with n_guess = np.random.uniform(-5, -3, nwalkers) z_guess = np.random.uniform(-2, 0, nwalkers) starting_guesses = np.vstack( (n_guess, z_guess)).T # initialise at a tiny sphere # Here's the function call where all the work happens: sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(interp_logf, np.log10(data_col), sigma_col)) sampler.run_mcmc(starting_guesses, nsteps, progress=True) # find out number of steps tau = sampler.get_autocorr_time( ) # number of steps needed to forget the starting position #print(tau) thin = int(np.mean(tau) / 2) # use this number for flattning the sample as done below #thin = 100 flat_samples = sampler.get_chain(discard=thin * 20, thin=5, flat=True) # we are discarding some initial steps roughly 5 times the autocorr_time steps # then we thin by about half the autocorrelation time steps for plotting => one does not have to do this step labels = ['log nH', 'log Z'] #uvb_q= int((model_Q.split('try_Q')[-1]).split('.fits')[0]) if Q_uvb == true_Q: fig = corner.corner(flat_samples, labels=labels, truths=truths, quantiles=[0.16, 0.5, 0.84], show_titles=True, title_kwargs={"fontsize": 12}) else: fig = corner.corner(flat_samples, labels=labels, quantiles=[0.16, 0.5, 0.84], show_titles=True, title_kwargs={"fontsize": 12}) fig.savefig(figname) for i in range(ndim): mcmc = np.percentile(flat_samples[:, i], [16, 50, 84]) q = np.diff(mcmc) print(labels[i], '=', mcmc[1], q[0], q[1]) return flat_samples, ndim
def calc_mcmc(self, nwalkers, niter, nburn, split_traintest): """ Running the MCMC :param nwalkers: Number of walkers, recommended at least 100 :param niter: Number of iterations, recommend at least 500 :param nburn: Number of iterations for burn-in phase, recommend at least 100 :return: None; updates and saves class variables """ self.kernel = self.kernel_gp(kernelname=kernelname) self.ndim_gp = len(self.kernel) # same as gpdim self.residual_blr = np.zeros_like(self.y) # Set up the sampler: self.nwalkers = nwalkers self.niter = niter self.ndim = np.int(self.ndim_gp + self.ndim_blr + 2) sampler = emcee.EnsembleSampler(self.nwalkers, self.ndim, self.lnprob) # Initialize the walkers: if george.__version__ < '0.3.0': p0_gp = np.log(self.kernel.pars) else: p0_gp = self.kernel.get_parameter_vector() alpha0 = np.mean(self.y) beta0 = np.zeros((self.ndim_blr)) sigma0 = np.std(self.y) / 2. p0_comb = np.hstack((alpha0, sigma0, beta0, p0_gp)) print("Initial proposal (alpha, sigma, beta vec, GP vec):") print(p0_comb) print("Initial log posterior function call") print(self.lnprob(p0_comb)) p0 = [ p0_comb + 1e-4 * np.random.randn(self.ndim) for i in range(self.nwalkers) ] print("Estimating MCMC time...") start_time = dt.datetime.now() _, _, _ = sampler.run_mcmc(p0, 10) # Reset the chain to remove the burn-in samples. sampler.reset() burn_time = (dt.datetime.now() - start_time).seconds print('Estimated time till completed: {} seconds '.format( burn_time * (self.niter + nburn) / 10.)) print("Running burn-in...") p0, _, state = sampler.run_mcmc(p0, nburn) # Reset the chain to remove the burn-in samples. sampler.reset() print("Running MCMC ...") pos, prob, state = sampler.run_mcmc(p0, self.niter, rstate0=state) # Save the mean acceptance fraction: af = sampler.acceptance_fraction self.accept_fr = af # Get the best model parameters and their respective errors: self.sampler_chain = sampler.chain self.sampler_flatchain = sampler.flatchain maxprob_index = np.argmax(prob) self.pos_fit = pos self.prob_fit = prob # save parameters with largest probability: self.params_fit = pos[maxprob_index] self.params_mean = np.mean(pos, axis=0) # save percentile of posterior distribution: self.params_50per = [ np.percentile(sampler.flatchain[:, i], 50) for i in range(self.ndim) ] self.params_2per = [ np.percentile(sampler.flatchain[:, i], 2) for i in range(self.ndim) ] self.params_16per = [ np.percentile(sampler.flatchain[:, i], 16) for i in range(self.ndim) ] self.params_84per = [ np.percentile(sampler.flatchain[:, i], 84) for i in range(self.ndim) ] self.params_97per = [ np.percentile(sampler.flatchain[:, i], 98) for i in range(self.ndim) ] # save standard deviation: self.errors_fit = np.asarray( [sampler.flatchain[:, i].std() for i in range(self.ndim)]) # save parameters: self.alpha_fit, self.alpha_err = self.params_fit[0], self.errors_fit[0] self.sigma_fit, self.sigma_err = self.params_fit[1], self.errors_fit[1] self.beta_fit, self.beta_err = self.params_fit[ 2:self.ndim_blr + 2], self.errors_fit[2:self.ndim_blr + 2] p_fit, p_err = self.params_fit[self.ndim_blr + 2:], self.errors_fit[self.ndim_blr + 2:] self.beta_fit = self.beta_fit # Calculate models and residuals for train data self.mu_blr = self.predict_blr(self.X_blr, self.alpha_fit, self.beta_fit) # BLR Model self.residual_blr = (self.y - self.mu_blr) self.mu_blr = self.mu_blr self.residual_blr = self.residual_blr """ #gp = george.GP(self.kernel, mean=np.mean(self.residual_blr)) gp = george.GP(self.kernel, mean=0.) if george.__version__ < '0.3.0': gp.kernel.pars = np.exp(p_fit) self.gp_fit = gp.kernel.pars else: gp.kernel.set_parameter_vector(p_fit) self.gp_fit = gp.kernel.get_parameter_vector() gp.compute(self.X_gp, self.sigma_fit) self.mu_gp, cov_gp = gp.predict(self.residual_blr, self.X_gp) # GP Model self.std_gp = np.sqrt(np.diag(cov_gp)) # standard deviation of GP self.y_model = self.mu_gp + self.mu_blr # Final Model """ self.y_model = self.mu_blr # Final Model # Print some MCMC results and additionally saved in text file: print("---- MCMC Results and Parameters ----") self.results_file.write('---- MCMC Results and Parameters ----\n') print("Mean acceptance fraction:", np.mean(af)) self.results_file.write('Mean acceptance fraction: {0} \n'.format( np.mean(af))) #print("Kernel: ", gp.kernel) #self.results_file.write('Kernel: {0} \n'.format(gp.kernel)) print("alpha, err:", round(self.alpha_fit, 2), round(self.alpha_err, 2)) self.results_file.write('alpha: {0} , err: {1} \n'.format( round(self.alpha_fit, 2), round(self.alpha_err, 2))) for i in range(len(self.beta_fit)): print('beta' + str(i) + ' , err:', round(self.beta_fit[i], 2), round(self.beta_err[i], 2)) self.results_file.write('beta {0}: {1} , err: {2} \n'.format( str(i), round(self.beta_fit[i], 2), round(self.beta_err[i], 2))) print('sigma, err:', round(self.sigma_fit, 2), round(self.sigma_err, 2)) self.results_file.write('sigma: {0} , err: {1} \n'.format( round(self.sigma_fit, 2), round(self.sigma_err, 2))) print("Model lnlikelihood: ", prob[maxprob_index]) self.results_file.write('Model lnlikelihood: {0} \n'.format( prob[maxprob_index])) """ print("Std GP: ", np.mean(self.std_gp)) self.results_file.write('Std GP: {0} \n'.format(np.mean(self.std_gp))) if george.__version__ < '0.3.0': print("GP lnlikelihood:", gp.lnlikelihood(self.y)) self.results_file.write('GP lnlikelihood: {0} \n'.format(gp.lnlikelihood(self.y))) else: print("GP lnlikelihood:", gp.log_likelihood(self.y)) self.results_file.write('GP lnlikelihood: {0} \n'.format(gp.log_likelihood(self.y))) """ # Calculate models and residuals for test data if split_traintest > 0.: self.mu_blr_test = self.predict_blr(self.X_blr_test, self.alpha_fit, self.beta_fit) # BLR Model self.residual_blr_test = (self.y_test - self.mu_blr_test) """ #gp = george.GP(self.kernel, mean=np.mean(self.residual_blr_test)) gp = george.GP(self.kernel, mean=0.) gp.compute(self.X_gp_test, self.sigma_fit) self.mu_gp_test, _ = gp.predict(self.residual_blr_test, self.X_gp_test) # GP Model self.y_model_test = self.mu_gp_test + self.mu_blr_test # Final Model """ self.y_model_test = self.mu_blr_test else: self.mu_blr_test, self.mu_gp_test, self.residual_blr_test, self.y_model_test = np.zeros( 4)
def fit_isochrone_mcmc(obs_file, nwalkers, burn_in, nsteps, thin, guess=False, magcut=17.0): seed = np.random.randint(2**25, 2**30) print('-------------------------------------------------------------') print('Starting MCMC fitting...') print('-------------------------------------------------------------') obs = np.genfromtxt(obs_file, names=True) #remove nans cond1 = np.isfinite(obs['Gmag']) cond2 = np.isfinite(obs['BPmag']) cond3 = np.isfinite(obs['RPmag']) cond4 = obs['Gmag'] < magcut ind = np.where(cond1 & cond2 & cond3 & cond4) obs = obs[ind] obs_oc = np.copy(obs[['Gmag', 'BPmag', 'RPmag']]) obs_oc.dtype.names = ['Gmag', 'G_BPmag', 'G_RPmag'] obs_oc_er = np.copy(obs[['e_Gmag', 'e_BPmag', 'e_RPmag']]) obs_oc_er.dtype.names = ['Gmag', 'G_BPmag', 'G_RPmag'] weight = obs['P'] * obs_oc['Gmag'].min() / obs_oc['Gmag'] # load full isochrone grid data and arrays of unique Age and Z values grid_dir = './grids/' mod_grid, age_grid, z_grid = load_mod_grid(grid_dir, isoc_set='MIST-GAIA') filters = ['Gmag', 'G_BPmag', 'G_RPmag'] refmag = 'Gmag' labels = ['age', 'dist', 'met', 'Ebv', 'Rv', 'bin', 'alpha'] prange = np.array([[6.5, 10.3], [0.1, 10.], [2e-06, 0.048], [0.1, 2.0], [2, 4.], [0., 0.8], [1.5, 3.5]]) ndim = prange.shape[0] midpoint = (prange[:, 1] - prange[:, 0]) / 2. + prange[:, 0] ndim = prange.shape[0] # define uniformly distributed walker starting positions pos = [] lik = [] for i in range(nwalkers): pars = [] for k in range(ndim): pars.append(np.random.uniform(prange[k, 0], prange[k, 1])) pos.append(np.array(pars)) lik.append( lnlikelihood(pars, obs_oc, obs_oc_er, filters, refmag, prange, weight)) # If there is initial guess generate walkers around it # scale=(prange[:,1]-prange[:,0])/10. # if guess: # pos = [guess + scale*np.random.randn(ndim) for i in range(nwalkers)] start_time = timeit.default_timer() # setup sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, lnlikelihood, a=1.1, args=(obs_oc, obs_oc_er, filters, refmag, prange, weight), threads=mp.cpu_count() - 1, live_dangerously=True) # run sampler in the burn in phase sampler.run_mcmc(pos, burn_in) # process samples samples = sampler.chain[:, :, :].reshape((-1, ndim)) # get best values and confidence intervals best_vals = np.array( map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [15, 50, 84], axis=0)))) # get best solution from maximul likelihood sampled best_sol = sampler.flatchain[sampler.flatlnprobability.argmax()] # reset sampler sampler.reset() # setup sampler after burn-in sampler = emcee.EnsembleSampler(nwalkers, ndim, lnlikelihood, a=1.1, args=(obs_oc, obs_oc_er, filters, refmag, prange, weight, seed), threads=mp.cpu_count() - 1, live_dangerously=True) print('done burn in phase...') print('') print('Best solution of burn in phase: ', best_sol) print('Average solution of burn in phase: ', best_vals[:, 0]) # redefine initial positions based on burn in results # scale = 0.01*best_vals[:,0] # pos = [best_vals[:,0] + scale*np.random.randn(ndim) for i in range(nwalkers)] # run sampler for final sample sampler.run_mcmc(pos, nsteps, thin=thin) # get final best solution best_sol = sampler.flatchain[sampler.flatlnprobability.argmax()] print('Finished sampling') samples = sampler.chain[:, nsteps / 2 / thin:, :].reshape((-1, ndim)) best_vals = np.array( map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [5, 50, 95], axis=0)))) print("Mean acceptance fraction: {0:.3f}".format( np.mean(sampler.acceptance_fraction))) print('Elapsed time: ', (timeit.default_timer() - start_time) / 60., ' minutes') ############################################################### # print results fig = corner.corner(samples, labels=labels, levels=(0.68, 0.95), smooth=True) for i in range(ndim): print(labels[i], best_sol[i], '-', best_vals[i, 1], '+', best_vals[i, 2]) print('') print('From sample averages:') for i in range(ndim): print(labels[i], best_vals[i, 0], '-', best_vals[i, 1], '+', best_vals[i, 2]) # plot chains fig, axes = plt.subplots(ndim, figsize=(10, 7), sharex=True) samples = sampler.chain for i in range(ndim): ax = axes[i] for k in range(nwalkers): ax.plot(np.array(samples[k, :, i]), "k", alpha=0.1) ax.set_ylabel(labels[i]) # plot averages fig, axes = plt.subplots(ndim, figsize=(10, 7), sharex=True) for i in range(ndim): ax = axes[i] for k in range(nwalkers): avgs = np.cumsum(np.array( samples[k, :, i])) / (np.arange(len(samples[k, :, i])) + 1) ax.plot(avgs, "k", alpha=0.1) ax.set_ylabel(labels[i]) print('-------------------------------------------------------------') print(' Final result') print('-------------------------------------------------------------') print(' '.join('%0.3f' % v for v in best_vals[:, 1])) print(' '.join('%0.3f' % v for v in (best_vals[:, 1] + best_vals[:, 2]) / 3)) return best_sol, (best_vals[:, 1] + best_vals[:, 2]) / 3
pos = sol + 1e-4 * np.random.randn(12, 4) #Defino la cantidad de caminantes. nwalkers, ndim = pos.shape #%% # Set up the backend os.chdir(path_datos_global + '/Resultados_cadenas/') filename = "sample_HS_SN_4params.h5" backend = emcee.backends.HDFBackend(filename) backend.reset(nwalkers, ndim) # Don't forget to clear it in case the file already exists textfile_witness = open('witness_2.txt', 'w+') textfile_witness.close() #%% #Initialize the sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, backend=backend) max_n = 10000 # This will be useful to testing convergence old_tau = np.inf # Now we'll sample for up to max_n steps for sample in sampler.sample(pos, iterations=max_n, progress=True): # Only check convergence every 100 steps if sampler.iteration % 5: #100 es cada cuanto chequea convergencia continue os.chdir(path_datos_global + '/Resultados_cadenas/') textfile_witness = open('witness_2.txt', 'w') textfile_witness.write('Número de iteración: {} \t'.format( sampler.iteration))
return log_prior(theta) + log_likelihood(theta, x, y, e, sigma_B) ndim = 2 + len(x) # number of parameters in the model nwalkers = 50 # number of MCMC walkers nburn = 100000 # "burn-in" period to let chains stabilize nsteps = 150000 # number of MCMC steps to take # set theta near the maximum likelihood, with np.random.seed(0) starting_guesses = np.zeros((nwalkers, ndim)) starting_guesses[:, :2] = np.random.normal(theta1, 1, (nwalkers, 2)) starting_guesses[:, 2:] = np.random.normal(0.5, 0.1, (nwalkers, ndim - 2)) import emcee import multiprocessing as mp sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[x, y, e, 50], threads=mp.cpu_count() ) sampler.run_mcmc(starting_guesses, nsteps) sample = sampler.chain # shape = (nwalkers, nsteps, ndim) sample = sampler.chain[:, nburn:, :].reshape(-1, ndim) theta3 = np.mean(sample[:, :2], axis=0) g = np.mean(sample[:, 2:], 0) outliers = (g < 0.5) #plt.show() plt.errorbar(x, y, e, fmt='.k', ecolor='gray') plt.plot(xfit, theta1[0] + theta1[1] * xfit, color='gray',label="y = %sx + %s"%(theta1[1],theta1[0]) ) plt.plot(xfit, theta2[0] + theta2[1] * xfit, color='green',label="Huber: y = %sx + %s"%(theta2[1],theta2[0]) ) plt.plot(xfit, theta3[0] + theta3[1] * xfit, color='navy',label="MCMC: y = %sx + %s"%(theta3[1],theta3[0]) ) plt.plot(x[outliers], y[outliers], 'ro', ms=20, mfc='none', mec='red')
def parallel(i): sampler = emcee.EnsembleSampler(nwalkers, ndim, WD_MCMC_func.ln_prob, args=[mass, age, pml, pmb, factor, l, b, mass_Q, age_Q, pml_Q, pmb_Q, factor_Q, l_Q, b_Q, NOT_FIT_UVW, NOT_FIT_INDEX, FIXV])
backend = emcee.backends.HDFBackend(filename) backend.reset(nwalkers, ndim) #print ("Running burn-in...") sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(total_data, total_cov),pool=pool,backend=backend) #pos, _, _ = sampler.run_mcmc(pos, 100, progress = True, store = False) #sampler.reset() print("Running production...") sampler.run_mcmc(pos, 5000,store=True, progress=True) ''' # Resume from saved chain with Pool() as pool: filename = "wpvpfmcmctest.h5" backend = emcee.backends.HDFBackend(filename) print("Initial size: {0}".format(backend.iteration)) sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(total_data, total_cov), pool=pool, backend=backend) print("Running production...") sampler.run_mcmc(None, 10000, store=True, progress=True) print("Final size: {0}".format(backend.iteration)) print("Mean acceptance fraction: {0:.3f}".format( np.mean(sampler.acceptance_fraction))) print(np.median(sampler.flatchain, axis=0))
else: return -np.inf def log_prob(theta,x,y,sigma_y): if np.isinf(log_prior(theta)): return -np.inf else: return log_prior(theta)-log_lik(theta, x,y,sigma_y) #obtain a,b,c which minimize (-log-likelihood) guess=(1,1,1) soln=minimize(log_lik,guess,args=(x,y,sigma_y)) #50 markov chains each starting from near the maxima of prob distribution. Each chain is in 3D nwalk,ndim=50,3 pos=soln.x+1e-4*np.random.randn(nwalk,ndim) sampler=emcee.EnsembleSampler(nwalk,ndim,log_prob,args=(x,y,sigma_y)) sampler.run_mcmc(pos,4000) samples=sampler.get_chain() plt.figure(figsize=(16,3)) plt.subplot(311) plt.plot(samples[:,:,0]) #a plt.xlabel('step no') plt.ylabel('a') plt.subplot(312) plt.plot(samples[:,:,1],) #b plt.xlabel('step no') plt.ylabel('b') plt.subplot(313) plt.plot(samples[:,:,2],) #c plt.xlabel('step no') plt.ylabel('c')
#Save the inital parameters save_file_init = open(DIR_TO_SAVE+str(65)+"_init_hyperparams.txt", "w") for i in range(len(kernel_labels)): save_file_init.write(str(kernel_labels[i])+": "+str(inital_params[i]) + ' ' + str(initial_bounds[i]) + '\n') save_file_init.close() #Weight function def lnprob(p): model.set_parameter_vector(p) return model.log_likelihood(y, quiet=True) + model.log_prior() #MCMC for parameter optimization initial = model.get_parameter_vector() ndim, nwalkers = len(initial), 32 p0 = initial + 1e-6 * np.random.randn(nwalkers, ndim) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob) print("Initial ln-likelihood: {0:.2f}".format(model.log_likelihood(y))) print("Running burn-in...") p0, _, _ = sampler.run_mcmc(p0, 500) #Define the number of samples to take in the burn-in #sampler.reset() print("Running production...") sampler.run_mcmc(p0,1000); #Define the number of samples samples = sampler.flatchain model.set_parameter_vector(np.percentile(samples,[50], axis=0)[0]) #Compute the predictions model.recompute() print("\nFinal ln-likelihood: {0:.2f}".format(model.log_likelihood(y))) period = model.get_parameter_vector()[1]
PARAM_MAXES, method='global-differential-evolution', ) print(f'Maximum likelihood parameters are: {best_fit}') ndim = PARAM_MINS.size initial_position = best_fit + WALKER_DISPERSION * np.random.randn( NWALKERS, ndim) chain_filename = generate_chain_filename() backend = emcee.backends.HDFBackend(chain_filename) backend.reset(NWALKERS, ndim) print('Beginning MCMC fit...') with pp.ProcessPool(NUM_PROCESSES) as pool: sampler = emcee.EnsembleSampler(NWALKERS, ndim, log_prob, args=(sim_stack, ), backend=backend, pool=pool) sampler.run_mcmc(initial_position, NSTEPS, progress=True) pool.close() pool.join() pool.clear() pool.terminate() pool.restart() print(bcolors.OKGREEN + '--- Analysis complete ---' + bcolors.ENDC)
def main(): ################################################################################ ########## READ IN THE RAW PHOTOMETRY #################### ################################################################################# numecl = 0 plnm = 'WASP_101' verbose = 'false' fpath = '/Users/rahuljayaraman/Documents/Miscellany/Research (Tucker Group)/Python (Transits)/' + plnm aorlist = os.listdir(fpath) #aorlist= [item for item in aorlist if not item.startswith('.')] #aorlist=aor_from_list(plnm, 1) #aorlist=[50494976] aorlist = ['62158336', '62159360'] #aorlist=np.delete(aorlist, [0,1, len(aorlist)-1]) for aor in aorlist: print(aor) aor = str(aor) prisec = 'primary' ramp_style = 'none' fpathout = fpath + aor + '/apr_fits/' + ramp_style + '/' directory = os.path.dirname(fpathout) if not os.path.exists(directory): os.makedirs(directory) #dd=np.load('/Users/Brian/Desktop/Tucker_Group/t_1/outputs/'+plnm+'/'+aor) dd = np.load(fpath + '/' + aor + 'extraction.npz') t = dd['time'] all_lc = dd['lc'] #hp=dd['hp'] cp = dd['cp'] exptime = dd['exptime'] framtime = 0.1 orbparams = dd['op'] holdpos = dd['hold_pos'] npix = dd['beta_np'] chnum = dd['ch'] red_all = [] orbparams[6] = 2456164.6934 #only for wasp-101b ################################################################################ pred_ecl_time = get_pred_time(orbparams, t, prisec) print(orbparams) print(pred_ecl_time - t[0]) freeparams = [pred_ecl_time - t[0], orbparams[2]] if prisec == 'secondary': freeparams[1] = 0.0011 ldc = [] else: ldc = find_coeffs( orbparams[10], orbparams[9], orbparams[8], 2, 'quadratic') #(temp, log_g, metallicity, channel, type_limb) for apr in range(0, all_lc.shape[1]): directory = os.path.dirname(fpathout) if not os.path.exists(directory): os.makedirs(directory) lc = np.squeeze(all_lc[:, apr] * 2.35481) time = (t - t[0]) time = np.squeeze(time) norm = np.nanmedian(lc) #print('Photon Noise limit is: ',(np.sqrt(norm*1.002)/(norm*1.002))) err = 1.1 * lc**0.5 lc = lc / norm err = err / norm err = np.ones(len(lc)) * 0.0045 xpos = holdpos[:, 0] ypos = holdpos[:, 1] npix = dd['beta_np'] ################################################################################ ########## NORMALIZE THE PIXEL VALUES #################### ################################################################################ timelength = len(t) #cp1=cp[1:4, 1:4, :] cp1 = cp dep_ind = cp1.shape[0] * cp1.shape[1] cp2 = np.reshape(cp1, (dep_ind, timelength)) cp3 = cp2 #[:,start:end] for p in range(0, len(time)): norm = np.sum(cp3[:, p]) cp3[:, p] /= norm ################################################################################ ########## FILTER THE DATA #################### ################################################################################ #fpathout='/Users/Brian/Desktop/Tucker_Group/Spitzer/mapping_files/outputs/'+plnm+'/'+aor+'/apr_fits/' filt_file = fpathout + 'post_filter_' + str(apr) + '.npz' #print(filt_file) if os.path.isfile(filt_file): if verbose == 'true': print('Found Filter File') ff = np.load(filt_file) lc = ff['lc'] #cp3=ff['cp3'] time = ff['time'] xpos = ff['xpos'] ypos = ff['ypos'] npix = ff['npix'] err = ff['err'] found = 'true' else: found = 'false' if verbose == 'true': print('In Filter') lc, cp3, time, xpos, ypos, npix, err = filter_data( lc, cp3, time, xpos, ypos, npix, dep_ind, err) if verbose == 'true': print('Out of Filter') plt.figure() plt.title(plnm + ' Ch: ' + str(chnum) + '\n' + str(aor) + '_' + str(apr)) plt.axvline(x=pred_ecl_time - t[0]) plt.axvline(x=pred_ecl_time - orbparams[4] * 0.5 - t[0], color='r', linestyle='dashed') plt.axvline(x=pred_ecl_time + orbparams[4] * 0.5 - t[0], color='r', linestyle='dashed') plt.scatter(time, lc, s=1) if prisec == 'secondary': plt.ylim(0.95, 1.05) else: plt.ylim(0.95, 1.03) #plt.xlim(time[0], np.amax(time)) plt.savefig(fpathout + 'raw_lc_plot_' + str(apr)) if verbose == 'true': plt.draw() plt.pause(1200) plt.close('all') # time2=np.multiply(time, time) # time=time[np.newaxis] # time2=time2[np.newaxis] # t2hours=time2*24.0**2.0 # thours=time*24.0 ################################################################################ ########## TRIM THE DATA #################### ################################################################################ trim_time = 0. #in minutes if trim_time != 0.: trim_time = trim_time / (60. * 24.0) #convert to days start_index = int(trim_time / (exptime / 86400.0)) end_ind = np.squeeze(lc) end_ind = end_ind.size print(exptime) lc = lc[start_index:end_ind] time = np.squeeze(time[start_index:end_ind]) xpos = xpos[start_index:end_ind] ypos = ypos[start_index:end_ind] npix = npix[start_index:end_ind] err = err[start_index:end_ind] plt.figure() plt.scatter(time, lc, s=1) plt.draw() ################################################################################ ########## FIND NEIGHBORS #################### ################################################################################ if found == 'true': gw = ff['gw'] nbr = ff['nbr'] else: if verbose == 'true': print('In Find NBR') gw, nbr = find_nbr_qhull(xpos, ypos, npix, sm_num=50, a=1.0, b=1.7777, c=1.0, print_space=10000.) if verbose == 'true': print('Out of Find NBR') np.savez(fpathout + 'post_filter_' + str(apr), lc=lc, cp3=cp3, time=time, xpos=xpos, ypos=ypos, npix=npix, err=err, gw=gw, nbr=nbr, orbparams=orbparams, pred_ecl_time=pred_ecl_time) ################################################################################ ########## FIT THE DATA #################### ################################################################################ if prisec == 'secondary': freeparams = [pred_ecl_time - t[0], orbparams[2], 0.005, 0.05] #the last 2 free params are ramp terms else: if ramp_style == 'linear': freeparams = [ pred_ecl_time - t[0], orbparams[2], 0.00001, 1.000001 ] if ramp_style == 'exp': freeparams = [ pred_ecl_time - t[0], orbparams[2], 0.005, 0.05 ] if ramp_style == 'none': freeparams = [pred_ecl_time - t[0], orbparams[2], 1.0, 1.0] params, m = initialize_model(np.squeeze(time), freeparams, orbparams, prisec, ldc) fluxcurve = m.light_curve(params) fit_params, pcov, infodict, flag, sucess = leastsq( nnbr_res, freeparams, args=(time, lc, err, gw, nbr, params, m, prisec, ramp_style), full_output=1) print('apr# ' + str(apr), fit_params) file_name = fpathout + 'apr_fit_' + str(apr) fileObject = open(file_name, 'wb') pickle.dump([lc, time, err, gw, nbr, fit_params], fileObject) fileObject.close() ################################################################################ ########## PLOT THE FIT #################### ################################################################################ if prisec == 'secondary': params.t_secondary = fit_params[0] params.fp = fit_params[1] else: params.t0 = fit_params[0] params.rp = fit_params[1] eclipse_model = m.light_curve(params) ramp = ramp_model([fit_params[2], fit_params[3]], time, ramp_style) lc2 = np.squeeze(lc / eclipse_model / ramp) w1 = lc2[nbr] w2 = np.multiply(w1, gw) w3 = np.sum(w2, 1) w4 = np.divide(lc2, w3) w5 = w4 * eclipse_model resids = (w4 - 1.) #/err res2 = (lc / eclipse_model - 1.0) / err pltbins = 64 blc = bin_anything(w5, pltbins) btime = bin_anything(time, pltbins) if prisec == 'secondary': phase = 0.5 + (time + t[0] - pred_ecl_time) / orbparams[5] if prisec == 'primary': phase = 0.0 + (time + t[0] - pred_ecl_time) / orbparams[5] bphase = bin_anything(phase, pltbins) plt.figure() plt.title(plnm + ' Ch: ' + str(chnum) + '\n' + str(aor) + '_' + str(apr)) plt.scatter(bphase, blc, s=10) #plt.scatter(time, lc, alpha=0.1, color='b', s=1) plt.plot(np.squeeze(phase), eclipse_model, color='r') if prisec == 'secondary': plt.ylim(0.9975, 1.0035) plt.text( 0.47, 1.003, 'T_center O-C (s): ' + str( round((fit_params[0] + t[0] - pred_ecl_time) * 86400., 1)) + ' Depth: ' + str(round(fit_params[1] * 1.0e6, 0)) + ' ppm') plt.text(0.49, 1.0025, 'SDNR: ' + str(round(np.std(resids), 6))) else: plt.ylim(0.983, 1.005) plt.text( 0.43, 0.9925, 'T_center O-C (s): ' + str( round((fit_params[0] + t[0] - pred_ecl_time) * 86400., 1))) plt.text( 0.43, 0.990, 'Transit Depth: ' + str(round(fit_params[1]**2. * 100, 4)) + ' %') plt.text(0.43, 0.9875, 'SDNR: ' + str(round(np.std(resids), 6))) plt.xlabel('Phase Units') plt.ylabel('Relative Flux') plt.savefig(fpathout + 'apr_fit_plot_' + str(apr)) if verbose == 'true': plt.draw() plt.pause(1.2) ################################################################################ ########## Get Red Noise #################### ################################################################################ sdnr, beta_red = est_rednoise(resids, framtime, fpathout, aor, apr, plnm, chnum, prisec) if red_all == []: red_all = np.ones(shape=(all_lc.shape[1], 5)) * 1000. red_all[apr, :] = [ sdnr, beta_red * sdnr, beta_red, round(fit_params[1] * 1.e6, 1), fit_params[0] ] best = np.nanargmin(red_all, axis=0) best = best[1] np.save(fpathout + aor + '_summary', red_all) np.savetxt(fpathout + aor + '_summary', red_all) if verbose == 'true': print(best) ################################################################################ ########## Load the best apr results #################### ################################################################################ filename = fpathout + 'apr_fit_' + str(best) fileObject = open(filename, 'rb') lc, time, err, gw, nbr, fit_params = pickle.load(fileObject) err = err * red_all[best, 2] print('Best Beta_red', red_all[best, 2]) params, m = initialize_model(np.squeeze(time), freeparams, orbparams, prisec, ldc) ################################################################################ ########## run_mcmc #################### ################################################################################ theta = fit_params ndim, nwalkers = len(theta), 20 sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(time, lc, err, gw, nbr, params, m, prisec, ramp_style)) pos = [theta + 1.e-4 * np.random.randn(ndim) for i in range(nwalkers)] sampler.run_mcmc(pos, 1500) samples = sampler.chain[:, 50:, :].reshape((-1, ndim)) np.save(fpathout + aor + '_samples', samples) if prisec == 'primary': fig = corner.corner(samples, labels=["t0", "rp", "a1", "a2"]) #, "A/R", "inc"]) else: fig = corner.corner(samples, labels=["t0", "Fp", "a1", "a2"]) #, "A/R", "inc"]) fig.savefig(fpathout + aor + '_corner_' + str(best) + '.png') #plt.show(block=False) #plt.pause(0.5) #Derive error bars t0_mcmc, rp_mcmc, a1_mcmc, a2_mcmc = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) print(rp_mcmc, t0_mcmc) np.savez(fpathout + aor + '_mcmc_results', rp_mcmc=rp_mcmc, t0_mcmc=t0_mcmc, a1_mcmc=a1_mcmc, a2_mcmc=a2_mcmc, best=best) phase = 0.0 + (time + t[0] - pred_ecl_time) / orbparams[5] bphase = bin_anything(phase, pltbins) plt.figure() for t0, rp, a1, a2 in samples[np.random.randint(len(samples), size=100)]: params.rp = rp params.t0 = t0 ecl_mod = m.light_curve(params) plt.plot(phase, ecl_mod, color='k', alpha=0.05) ramp = ramp_model([a1, a2], time, ramp_style) lc2 = np.squeeze(lc / ecl_mod / ramp) w1 = lc2[nbr] w2 = np.multiply(w1, gw) w3 = np.sum(w2, 1) w4 = np.divide(lc2, w3) w5 = w4 * ecl_mod resids = (w4 - 1.) #/err res2 = (lc / ecl_mod - 1.0) / err blc = bin_anything(w5, pltbins) btime = bin_anything(time, pltbins) plt.scatter(bphase, blc, s=8, alpha=0.5) plt.xlabel("Phase Units") plt.ylabel("Relative Flux") plt.title(plnm + ' Ch: ' + str(chnum)) plt.show() #plt.savefig('/Users/Brian/Desktop/W79_summary/'+str(chnum)+'_mcmc_fit') return None
def emcee(nsteps=500, ndim=8, nwalkers=16, walker_1=50.0, walker_2=30.0, walker_3=-6, walker_4=1.65e-5, walker_5=60, walker_6=40, walker_7=.2, walker_8=.8, sigma_1=50.0, sigma_2=50.0, sigma_3=2, sigma_4=8e-6, sigma_5=20, sigma_6=10, sigma_7=.5, sigma_8=.5, restart=True): '''Perform MCMC affine invariants :param nsteps: The number of iterations :param ndim: number of dimensions :param nwalkers: number of walkers :param walker_1: the first parameter for the 1st dimension - r_in :param walker_2: the first parameter for the 2nd dimension - delta_r :param walker_3: the first parameter for the 3rd dimension - log_m_disk :param walker_4: the first parameter for the 4th dimension - f_star :param walker_5: the first parameter for the 5th dimension - position_angle :param walker_6: the first parameter for the 6th dimension - inclination :param walker_7: the first parameter for the 7th dimension - xoffs for disk :param walker_8: the first parameter for the 8th dimension - yoffs for disk :param sigma_1: sigma for walker_1 :param sigma_2: sigma for walker_2 :param sigma_3: sigma for walker_3 :param sigma_4: sigma for walker_4 :param sigma_5: sigma for walker_5 :param sigma_6: sigma for walker_6 :param sigma_7: sigma for walker_7 :param sigma_8: sigma for walker_8 ''' #r_out = r_in + delta_r '''walker_1_array = [walker_1] walker_2_array = [walker_2] walker_3_array = [walker_3] walker_4_array = [walker_4] walker_5_array = [walker_5] walker_6_array = [walker_6] p0 = [walker_1, walker_2, walker_3, walker_4, walker_5, walker_6]''' #chi_array = [np.sum(((y_data_1) - (walker_1_array*x_data_1+walker_2_array))**2/sigma_data_1**2)] if restart == False: p0 = np.random.normal(loc=(walker_1, walker_2, walker_3, walker_4, walker_5, walker_6, walker_7, walker_8), size=(nwalkers, ndim), scale=(sigma_1, sigma_2, sigma_3, sigma_4, sigma_5, sigma_6, sigma_7, sigma_8)) else: #read from csv file dg = pd.read_csv("chain_25steps_new8params.csv") p0 = np.zeros([nwalkers, ndim]) for i in range(nwalkers): p0[i, 0] = dg['r_in'].iloc[-(nwalkers - i + 1)] p0[i, 1] = dg['delta_r'].iloc[-(nwalkers - i + 1)] - p0[ i, 0] #future versions should be delta_r p0[i, 2] = dg['m_disk'].iloc[-(nwalkers - i + 1)] p0[i, 3] = dg['f_star'].iloc[-(nwalkers - i + 1)] p0[i, 4] = dg['position_angle'].iloc[-(nwalkers - i + 1)] p0[i, 5] = dg['inclination'].iloc[-(nwalkers - i + 1)] p0[i, 6] = dg['xoffs'].iloc[-(nwalkers - i + 1)] p0[i, 7] = dg['yoffs'].iloc[-(nwalkers - i + 1)] #p0 = (loc=(walker_1, walker_2, walker_3, walker_4, walker_5, walker_6), size=(nwalkers, ndim)) import emcee sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob) #threads=10, a=4.0 run = sampler.sample(p0, iterations=nsteps, storechain=True) steps = [] for i, result in enumerate(run): pos, lnprobs, blob = result new_step = [np.append(pos[k], lnprobs[k]) for k in range(nwalkers)] steps += new_step #print(pos) print(lnprobs) sys.stdout.write("Completed step {} of {} \r".format(i, nsteps)) sys.stdout.flush() #steps = steps[5000:] df = pd.DataFrame(steps) df.columns = [ 'r_in', 'delta_r', 'm_disk', 'f_star', 'position_angle', 'inclination', 'xoffs', 'yoffs', 'lnprob' ] df.to_csv('chain_475steps_new8params.csv') '''max_lnprob = df['lnprob'].max() max_m = df.x[df.lnprob.idxmax()] max_b = df.y[df.lnprob.idxmax()]''' print(np.shape(sampler.chain)) '''samples = sampler.chain[:, 1000:, :].reshape((-1, ndim)) fig = corner.corner(samples, labels=["$m$", "$b$"],truths=[max_m, max_b]) fig.savefig("triangle1.png")''' '''print(max_lnprob) print(max_m) print(max_b)''' print("Finished MCMC.") print("Mean acceptance fraction: {0:.3f}".format( np.mean(sampler.acceptance_fraction))) #plt.close() cmap_light = sns.diverging_palette(220, 20, center='dark', n=nwalkers) #colors = ['red', 'blue', 'green', 'purple', 'yellow', 'black'] fig, ax = plt.subplots() for i in range(nwalkers): #c = colors[i] ax.plot(df['r_in'][i::nwalkers], df['delta_r'][i::nwalkers], linestyle='-', marker='.', alpha=0.5) plt.show(block=False) w1m = df['r_in'][0::nwalkers] w2m = df['delta_r'][1::nwalkers] fig, (ax0, ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(ndim + 1) x = np.arange(0, len(w1m)) print(np.shape(x), np.shape(w1m)) print(np.shape(x), np.shape(w2m)) for i in range(0, nwalkers): ax0.plot(x, df['r_in'][i::nwalkers]) ax1.plot(x, df['delta_r'][i::nwalkers]) ax2.plot(x, df['m_disk'][i::nwalkers]) ax3.plot(x, df['f_star'][i::nwalkers]) ax4.plot(x, df['position_angle'][i::nwalkers]) ax5.plot(x, df['inclination'][i::nwalkers]) ax6.plot(x, df['xoffs'][i::nwalkers]) ax7.plot(x, df['yoffs'][i::nwalkers]) ax8.plot(x, df['lnprob'][i::nwalkers]) fig.suptitle( 'r_in, delta_r, m_disk, f_star, position_angle, inclination, xoffs, yoffs, lnprob' ) plt.show(block=False) print(np.shape(x), np.shape(w1m))