with open("astero-{0}.pkl".format(kicid), "wb") as f: pickle.dump(( gp, fit_y, freq, power_all, power_some, len(x), ), f, -1) if os.path.exists("astero-{0}.h5".format(kicid)): result = input("MCMC save file exists. Overwrite? (type 'yes'): ") if result.lower() != "yes": sys.exit(0) # Define a custom proposal def astero_move(rng, x0): x = np.array(x0) f = 2.0 * (rng.rand(len(x)) < 0.5) - 1.0 x[:, 3] = np.log(np.exp(x[:, 3]) + f * np.exp(x[:, 4])) return x, np.zeros(len(x)) # The sampler will use a mixture of proposals sampler = emcee3.Sampler([ emcee3.moves.StretchMove(), emcee3.moves.DEMove(1e-3), emcee3.moves.KDEMove(), emcee3.moves.MHMove(astero_move), ], backend=emcee3.backends.HDFBackend("astero-{0}.h5".format(kicid))) # Sample! with emcee3.pools.InterruptiblePool() as pool: ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), initial_samples, pool=pool) ensemble = sampler.run(ensemble, 10000, progress=True)
# Initialize print("Running MCMC sampling...") ndim = len(ml_params) nwalkers = 32 pos = ml_params + 1e-5 * np.random.randn(nwalkers, ndim) lp = np.array(list(map(log_prob, pos))) m = ~np.isfinite(lp) while np.any(m): pos[m] = ml_params + 1e-5 * np.random.randn(m.sum(), ndim) lp[m] = np.array(list(map(log_prob, pos[m]))) m = ~np.isfinite(lp) # Sample sampler = emcee3.Sampler() ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), pos) sampler.run(ensemble, 1250, progress=True) # Compute the model predictions gp.set_parameter_vector(ml_params) x = np.linspace(t.min(), t.max(), 5000) mu, var = gp.predict(y, x, return_var=True) omega = np.exp(np.linspace(np.log(0.1), np.log(10), 5000)) psd = gp.kernel.get_psd(omega) period = np.exp(gp.get_parameter("kernel:log_period")) tau = np.linspace(0, 4 * period, 5000) acf = gp.kernel.get_value(tau) # Compute the sample predictions print("Making plots...") samples = sampler.get_coords(flat=True, discard=250)
def mcmc_fit(x, y, yerr, p_init, p_max, id, RESULTS_DIR, truths, burnin=500, nwalkers=12, nruns=10, full_run=500, diff_threshold=.5, n_independent=1000): """ Run the MCMC """ try: print("Total number of points = ", sum([len(i) for i in x])) print("Number of light curve sections = ", len(x)) except TypeError: print("Total number of points = ", len(x)) theta_init = np.log( [np.exp(-12), np.exp(7), np.exp(-1), np.exp(-17), p_init]) runs = np.zeros(nruns) + full_run ndim = len(theta_init) print("p_init = ", p_init, "days, log(p_init) = ", np.log(p_init), "p_max = ", p_max) args = (x, y, yerr, np.log(p_init), p_max) # Time the LHF call. start = time.time() mod = MyModel(x, y, yerr, np.log(p_init), p_max) print("lnlike = ", mod.lnlike_split(theta_init), "lnprior = ", mod.Glnprior(theta_init), "\n") end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("each run will take", tm * nwalkers * runs[0] / 60, "mins") print("total = ", (tm * nwalkers * np.sum(runs) + tm * nwalkers * burnin) / 60, "mins") # Run MCMC. mod = MyModel(x, y, yerr, np.log(p_init), p_max) model = emcee3.SimpleModel(mod.lnlike_split, mod.Glnprior) p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)] ensemble = emcee3.Ensemble(model, p0) # moves = emcee3.moves.KDEMove() # sampler = emcee3.Sampler(moves) sampler = emcee3.Sampler() print("burning in...") total_start = time.time() ensemble = sampler.run(ensemble, burnin) flat = sampler.get_coords(flat=True) logprob = sampler.get_log_probability(flat=True) ensemble = emcee3.Ensemble(model, p0) # repeating MCMC runs. autocorr_times, mean_ind, mean_diff = [], [], [] sample_array = np.zeros((nwalkers, sum(runs), ndim)) for i, run in enumerate(runs): print("run {0} of {1}".format(i, len(runs))) print("production run, {0} steps".format(int(run))) start = time.time() ensemble = sampler.run(ensemble, run) end = time.time() print("time taken = ", (end - start) / 60, "minutes") f = h5py.File(os.path.join(RESULTS_DIR, "{0}.h5".format(id)), "w") data = f.create_dataset("samples", np.shape(sampler.get_coords(flat=True))) data[:, :] = sampler.get_coords(flat=True) f.close() print("samples = ", np.shape(sampler.get_coords(flat=True))) results = make_plot(sampler, x, y, yerr, id, RESULTS_DIR, truths, traces=True, tri=True, prediction=True) nsteps, _ = np.shape(sampler.get_coords(flat=True)) conv, autocorr_times, ind_samp, diff = \ evaluate_convergence(sampler.get_coords(flat=True), autocorr_times, diff_threshold, n_independent) mean_ind.append(ind_samp) mean_diff.append(diff) print("Converged?", conv) if conv: break total_end = time.time() total_time = total_end - total_start print("Total time taken = ", total_time / 60., "minutes", total_time / 3600., "hours") with open(os.path.join(RESULTS_DIR, "{0}_time.txt".format(id)), "w") as f: f.write("{}".format(total_time)) # col = "b" # if conv: # col = "r" # if autocorr_times: # plt.clf() # plt.plot(autocorr_times, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_acorr".format(id))) # plt.clf() # plt.plot(mean_ind, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_ind".format(id))) # plt.clf() # plt.plot(mean_diff, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_diff".format(id))) return