# Initialize print("Running MCMC sampling...") ndim = len(ml_params) nwalkers = 32 pos = ml_params + 1e-5 * np.random.randn(nwalkers, ndim) lp = np.array(list(map(log_prob, pos))) m = ~np.isfinite(lp) while np.any(m): pos[m] = ml_params + 1e-5 * np.random.randn(m.sum(), ndim) lp[m] = np.array(list(map(log_prob, pos[m]))) m = ~np.isfinite(lp) # Sample sampler = emcee3.Sampler() ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), pos) sampler.run(ensemble, 1250, progress=True) # Compute the model predictions gp.set_parameter_vector(ml_params) x = np.linspace(t.min(), t.max(), 5000) mu, var = gp.predict(y, x, return_var=True) omega = np.exp(np.linspace(np.log(0.1), np.log(10), 5000)) psd = gp.kernel.get_psd(omega) period = np.exp(gp.get_parameter("kernel:log_period")) tau = np.linspace(0, 4 * period, 5000) acf = gp.kernel.get_value(tau) # Compute the sample predictions print("Making plots...")
with open("astero-{0}.pkl".format(kicid), "wb") as f: pickle.dump(( gp, fit_y, freq, power_all, power_some, len(x), ), f, -1) if os.path.exists("astero-{0}.h5".format(kicid)): result = input("MCMC save file exists. Overwrite? (type 'yes'): ") if result.lower() != "yes": sys.exit(0) # Define a custom proposal def astero_move(rng, x0): x = np.array(x0) f = 2.0 * (rng.rand(len(x)) < 0.5) - 1.0 x[:, 3] = np.log(np.exp(x[:, 3]) + f * np.exp(x[:, 4])) return x, np.zeros(len(x)) # The sampler will use a mixture of proposals sampler = emcee3.Sampler([ emcee3.moves.StretchMove(), emcee3.moves.DEMove(1e-3), emcee3.moves.KDEMove(), emcee3.moves.MHMove(astero_move), ], backend=emcee3.backends.HDFBackend("astero-{0}.h5".format(kicid))) # Sample! with emcee3.pools.InterruptiblePool() as pool: ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), initial_samples, pool=pool) ensemble = sampler.run(ensemble, 10000, progress=True)
def fit_star(star, verbose=False): output_filename = "{0}.h5".format(star.kepid) logging.info("Output filename: {0}".format(output_filename)) if os.path.exists(output_filename): return time.sleep(30) return strt = time.time() # The KIC parameters mean_log_mass = np.log(star.mass) sigma_log_mass = (np.log(star.mass + star.mass_err1) - np.log(star.mass + star.mass_err2) ) # double the kic value mean_feh = star.feh sigma_feh = star.feh_err1 - star.feh_err2 # double the kic value min_distance, max_distance = 0.0, 3000.0 # Other bands other_bands = dict() if np.isfinite(star.tgas_w1gmag): other_bands = dict( W1=(star.tgas_w1gmag, star.tgas_w1gmag_error), W2=(star.tgas_w2gmag, star.tgas_w2gmag_error), W3=(star.tgas_w3gmag, star.tgas_w3gmag_error), ) if np.isfinite(star.tgas_Vmag): other_bands["V"] = (star.tgas_Vmag, star.tgas_e_Vmag) if np.isfinite(star.tgas_Bmag): other_bands["B"] = (star.tgas_Bmag, star.tgas_e_Bmag) if np.isfinite(star.tgas_gpmag): other_bands["g"] = (star.tgas_gpmag, star.tgas_e_gpmag) if np.isfinite(star.tgas_rpmag): other_bands["r"] = (star.tgas_rpmag, star.tgas_e_rpmag) if np.isfinite(star.tgas_ipmag): other_bands["i"] = (star.tgas_ipmag, star.tgas_e_ipmag) # Build the model mist = MIST_Isochrone() mod = StarModel(mist, J=(star.jmag, star.jmag_err), H=(star.hmag, star.hmag_err), K=(star.kmag, star.kmag_err), parallax=(star.tgas_parallax, star.tgas_parallax_error), **other_bands) # Initialize nwalkers = 500 ndim = 5 lnpost_init = -np.inf + np.zeros(nwalkers) coords_init = np.empty((nwalkers, ndim)) m = ~np.isfinite(lnpost_init) while np.any(m): K = m.sum() # Mass coords_init[m, 0] = np.exp(mean_log_mass + sigma_log_mass * np.random.randn(K)) # Age u = np.random.rand(K) coords_init[m, 1] = np.log((np.exp(mist.maxage) - np.exp(mist.minage)) * u + np.exp(mist.minage)) # Fe/H coords_init[m, 2] = mean_feh + sigma_feh * np.random.randn(K) # Distance u = np.random.rand(K) coords_init[m, 3] = (u * (max_distance**3 - min_distance**3) + min_distance**3)**(1. / 3) # Av coords_init[m, 4] = np.random.rand(K) lnpost_init[m] = np.array(list(map(mod.lnpost, coords_init[m]))) m = ~np.isfinite(lnpost_init) class ICModel(emcee3.Model): def compute_log_prior(self, state): state.log_prior = mod.lnprior(state.coords) return state def compute_log_likelihood(self, state): state.log_likelihood = mod.lnlike(state.coords) return state sampler = emcee3.Sampler(emcee3.moves.KDEMove()) ensemble = emcee3.Ensemble(ICModel(), coords_init) chunksize = 200 targetn = 3 for iteration in range(100): if verbose: print("Iteration {0}...".format(iteration + 1)) sampler.run(ensemble, chunksize, progress=verbose) mu = np.mean(sampler.get_coords(), axis=1) try: tau = emcee3.autocorr.integrated_time(mu, c=1) except emcee3.autocorr.AutocorrError: continue tau_max = tau.max() neff = ((iteration + 1) * chunksize / tau_max - 2.0) if verbose: print("Maximum autocorrelation time: {0}".format(tau_max)) print("N_eff: {0}\n".format(neff * nwalkers)) if neff > targetn: break burnin = int(2 * tau_max) ntot = 5000 if verbose: print("Discarding {0} samples for burn-in".format(burnin)) print("Randomly choosing {0} samples".format(ntot)) samples = sampler.get_coords(flat=True, discard=burnin) total_samples = len(total_samples) inds = np.random.choice(np.arange(len(samples)), size=ntot, replace=False) samples = samples[inds] fit_parameters = np.empty(len(samples), dtype=[ ("mass", float), ("log10_age", float), ("feh", float), ("distance", float), ("av", float), ]) computed_parameters = np.empty(len(samples), dtype=[ ("radius", float), ("teff", float), ("logg", float), ]) if verbose: prog = tqdm.tqdm else: prog = lambda f, *args, **kwargs: f for i, p in prog(enumerate(samples), total=len(samples)): ic = mod.ic(*p) fit_parameters[i] = p computed_parameters[i] = (ic["radius"], ic["Teff"], ic["logg"]) total_time = time.time() - strt logging.info("emcee3 took {0} sec".format(total_time)) with h5py.File(output_filename, "w") as f: f.attrs["kepid"] = int(star.kepid) f.attrs["neff"] = neff * nwalkers f.attrs["runtime"] = total_time f.create_dataset("fit_parameters", data=fit_parameters) f.create_dataset("computed_parameters", data=computed_parameters) # Plot fig = corner.corner(samples) fig.savefig("corner-{0}.png".format(star.kepid)) plt.close(fig)
def fit_emcee3(mod, nwalkers=500, verbose=False, nsamples=5000, targetn=4, iter_chunksize=200, pool=None, overwrite=False, maxiter=10, sample_directory='mcmc_chains', nburn=2, mixedmoves=True, resultsdir='mcmc_results', prior_only=False, **kwargs): """fit model using Emcee3 modeled after https://github.com/dfm/gaia-kepler/blob/master/fit.py nburn is number of autocorr times to discard as burnin. """ # Initialize if prior_only: walker = Emcee3PriorModel(mod) else: walker = Emcee3Model(mod) ndim = mod.n_params if sample_directory is not None: sample_file = os.path.join(sample_directory, '{}.h5'.format(mod.name)) if not os.path.exists(sample_directory): os.makedirs(sample_directory) backend = HDFBackend(sample_file) try: coords_init = backend.current_coords except (AttributeError, KeyError): coords_init = mod.sample_from_prior(nwalkers, require_valid=True, values=True) else: backend = Backend() coords_init = mod.sample_from_prior(nwalkers, require_valid=True, values=True) if mixedmoves: moves = [(emcee3.moves.KDEMove(), 0.4), (emcee3.moves.DEMove(1.0), 0.4), (emcee3.moves.DESnookerMove(), 0.2)] else: moves = emcee3.moves.KDEMove() sampler = emcee3.Sampler(moves, backend=backend) if overwrite: sampler.reset() coords_init = mod.sample_from_prior(nwalkers, require_valid=True, values=True) if pool is None: from emcee3.pools import DefaultPool pool = DefaultPool() try: ensemble = emcee3.Ensemble(walker, coords_init, pool=pool) except ValueError: import pdb pdb.set_trace() def calc_stats(s): """returns tau_max, neff """ tau = s.get_integrated_autocorr_time(c=1) tau_max = tau.max() neff = s.backend.niter / tau_max - nburn if verbose: print("Maximum autocorrelation time: {0}".format(tau_max)) print("N_eff: {0} ({1})\n".format(neff * nwalkers, neff - nburn)) return tau_max, neff done = False if not overwrite: try: if verbose: print('Status from previous run:') tau_max, neff = calc_stats(sampler) if neff > targetn: done = True except (emcee3.autocorr.AutocorrError, KeyError): pass chunksize = iter_chunksize for iteration in range(maxiter): if done: break if verbose: print("Iteration {0}...".format(iteration + 1)) sampler.run(ensemble, chunksize, progress=verbose) try: tau_max, neff = calc_stats(sampler) except emcee3.autocorr.AutocorrError: tau_max = 0 continue if neff > targetn: done = True burnin = int(nburn * tau_max) ntot = nsamples samples = sampler.get_coords(flat=True, discard=burnin) total_samples = len(samples) if ntot > total_samples: ntot = total_samples if verbose: print("Discarding {0} samples for burn-in".format(burnin)) print("Randomly choosing {0} samples".format(ntot)) inds = np.random.choice(total_samples, size=ntot, replace=False) samples = samples[inds] df = pd.DataFrame(samples, columns=mod.param_names) write_samples(mod, df, resultsdir=resultsdir) return df
# Initialize print("Running MCMC sampling...") ndim = len(ml_params) nwalkers = 32 pos = ml_params + 1e-5 * np.random.randn(nwalkers, ndim) lp = np.array(list(map(log_prob, pos))) m = ~np.isfinite(lp) while np.any(m): pos[m] = ml_params + 1e-5 * np.random.randn(m.sum(), ndim) lp[m] = np.array(list(map(log_prob, pos[m]))) m = ~np.isfinite(lp) # Sample sampler = emcee3.Sampler(backend=emcee3.backends.HDFBackend("transit.h5")) with emcee3.pools.InterruptiblePool() as pool: ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), pos, pool=pool) sampler.run(ensemble, 15000, progress=True) # Plot the parameter constraints samples = np.array(sampler.get_coords(discard=5000, flat=True, thin=13)) samples = samples[:, 1:5] samples[:, :3] = np.exp(samples[:, :3]) truths = np.array(true_params[1:5]) truths[:3] = np.exp(truths[:3]) fig = corner.corner( samples, truths=truths, labels=[r"period", r"$R_\mathrm{P}/R_\star$", r"duration", r"$t_0$"]) fig.savefig("transit-corner.pdf")
def mcmc_fit(x, y, yerr, p_init, p_max, id, RESULTS_DIR, truths, burnin=500, nwalkers=12, nruns=10, full_run=500, diff_threshold=.5, n_independent=1000): """ Run the MCMC """ try: print("Total number of points = ", sum([len(i) for i in x])) print("Number of light curve sections = ", len(x)) except TypeError: print("Total number of points = ", len(x)) theta_init = np.log( [np.exp(-12), np.exp(7), np.exp(-1), np.exp(-17), p_init]) runs = np.zeros(nruns) + full_run ndim = len(theta_init) print("p_init = ", p_init, "days, log(p_init) = ", np.log(p_init), "p_max = ", p_max) args = (x, y, yerr, np.log(p_init), p_max) # Time the LHF call. start = time.time() mod = MyModel(x, y, yerr, np.log(p_init), p_max) print("lnlike = ", mod.lnlike_split(theta_init), "lnprior = ", mod.Glnprior(theta_init), "\n") end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("each run will take", tm * nwalkers * runs[0] / 60, "mins") print("total = ", (tm * nwalkers * np.sum(runs) + tm * nwalkers * burnin) / 60, "mins") # Run MCMC. mod = MyModel(x, y, yerr, np.log(p_init), p_max) model = emcee3.SimpleModel(mod.lnlike_split, mod.Glnprior) p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)] ensemble = emcee3.Ensemble(model, p0) # moves = emcee3.moves.KDEMove() # sampler = emcee3.Sampler(moves) sampler = emcee3.Sampler() print("burning in...") total_start = time.time() ensemble = sampler.run(ensemble, burnin) flat = sampler.get_coords(flat=True) logprob = sampler.get_log_probability(flat=True) ensemble = emcee3.Ensemble(model, p0) # repeating MCMC runs. autocorr_times, mean_ind, mean_diff = [], [], [] sample_array = np.zeros((nwalkers, sum(runs), ndim)) for i, run in enumerate(runs): print("run {0} of {1}".format(i, len(runs))) print("production run, {0} steps".format(int(run))) start = time.time() ensemble = sampler.run(ensemble, run) end = time.time() print("time taken = ", (end - start) / 60, "minutes") f = h5py.File(os.path.join(RESULTS_DIR, "{0}.h5".format(id)), "w") data = f.create_dataset("samples", np.shape(sampler.get_coords(flat=True))) data[:, :] = sampler.get_coords(flat=True) f.close() print("samples = ", np.shape(sampler.get_coords(flat=True))) results = make_plot(sampler, x, y, yerr, id, RESULTS_DIR, truths, traces=True, tri=True, prediction=True) nsteps, _ = np.shape(sampler.get_coords(flat=True)) conv, autocorr_times, ind_samp, diff = \ evaluate_convergence(sampler.get_coords(flat=True), autocorr_times, diff_threshold, n_independent) mean_ind.append(ind_samp) mean_diff.append(diff) print("Converged?", conv) if conv: break total_end = time.time() total_time = total_end - total_start print("Total time taken = ", total_time / 60., "minutes", total_time / 3600., "hours") with open(os.path.join(RESULTS_DIR, "{0}_time.txt".format(id)), "w") as f: f.write("{}".format(total_time)) # col = "b" # if conv: # col = "r" # if autocorr_times: # plt.clf() # plt.plot(autocorr_times, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_acorr".format(id))) # plt.clf() # plt.plot(mean_ind, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_ind".format(id))) # plt.clf() # plt.plot(mean_diff, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_diff".format(id))) return