def fit(theta_init, x, ys, yerr, plims): """ fitting the GP. theta_init should be in linear space, not log. as should plims """ theta_init = np.log(theta_init) DIR = "../figs" sampler = MCMC(theta_init, x, ys, yerr, plims, 200, 500, "test", DIR) make_plot(sampler, x, ys, yerr, 1, DIR, traces=True)
def fit( x, y, yerr, id, p_init, plims, burnin=500, run=1500, npts=48, cutoff=100, sine_kernel=False, acf=False, runMCMC=True, plot=False, ): """ takes x, y, yerr and initial guesses and priors for period and does the full GP MCMC. Tuning parameters include cutoff (number of days), npts (number of points per bin). """ # measure ACF period if acf: corr_run(x, y, yerr, id, "/Users/angusr/Python/GProtation/code") if sine_kernel: print "sine kernel" theta_init = [np.exp(-5), np.exp(7), np.exp(0.6), np.exp(-16), p_init] print theta_init from GProtation import MCMC, make_plot else: print "cosine kernel" theta_init = [1e-2, 1.0, 1e-2, p_init] print "theta_init = ", np.log(theta_init) from GProtation_cosine import MCMC, make_plot xb, yb, yerrb = bin_data(x, y, yerr, npts) # bin data m = xb < cutoff # truncate theta_init = np.log(theta_init) DIR = "cosine" if sine_kernel: DIR = "sine" print theta_init if runMCMC: sampler = MCMC(theta_init, xb[m], yb[m], yerrb[m], plims, burnin, run, id, DIR) # make various plots if plot: with h5py.File("%s/%s_samples.h5" % (DIR, str(int(id)).zfill(4)), "r") as f: samples = f["samples"][...] m = x < cutoff mcmc_result = make_plot(samples, x[m], y[m], yerr[m], id, DIR, traces=False, triangle=False, prediction=True)
def recover_injections(id, x, y, yerr, path, burnin, run, nwalkers=32, plot=True): """ Take x, y, yerr, calculate ACF period for initialisation and do MCMC. npts: number of points per period. id: star id. x, y, yerr: time, flux and error arrays. path: path where you want to save the output. burnin: the number of burnin steps. run: the number of steps to run for. nwalkers: the number of walkers. plot: if True then plots of posteriors and chains will be made. """ # initialise with pgram try: p_init = np.genfromtxt("{0}/{1}_pgramresult.txt".format(path, id)) except: p_init = periodograms(id, x, y, yerr, path, plot=True) if p_init < .5: # small periods raise an error with george. p_init = 1. # If using lnprob, plims = [pmin, pmax] for a uniform prior. # If using Gprob, plims = [mu, sigma] for a Gaussian prior. # plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init]) plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init]) # plims = np.log([p_init, p_init*.1]) # mean, sigma print("Initial period and limits:", p_init, np.exp(plims)) # assign theta_init theta_init = np.log([np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16), p_init]) print("\n", "log(theta_init) = ", theta_init) print("theta_init = ", np.exp(theta_init), "\n") # plot initialisation t = np.exp(theta_init) k = t[0] * ExpSquaredKernel(t[1]) * ExpSine2Kernel(t[2], t[3]) gp = george.GP(k) gp.compute(x, yerr) xs = np.linspace(x[0], x[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x, y, yerr=yerr, **reb) plt.plot(xs, mu, color=cols.blue) plt.savefig("{0}/{1}_init".format(path, id)) # set up MCMC ndim, nwalkers = len(theta_init), nwalkers p0 = [theta_init+1e-4*np.random.rand(ndim) for i in range(nwalkers)] args = (x, y, yerr, plims) # time the lhf call start = time.time() print("lnprob = ", lnprob(theta_init, x, y, yerr, plims)) # print("lnprob = ", Gprob(theta_init, x, y, yerr, plims)) end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("run will take", tm * nwalkers * run, "s") print("total = ", (tm*nwalkers*run + tm*nwalkers*burnin)/60, \ "mins,", (tm*nwalkers*run + tm*nwalkers*burnin)/3600, "hours") # run MCMC sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args) # sampler = emcee.EnsembleSampler(nwalkers, ndim, Gprob, args=args) print("burning in...") start = time.time() p0, lp, state = sampler.run_mcmc(p0, burnin) sampler.reset() print("production run...") p0, lp, state = sampler.run_mcmc(p0, run) end = time.time() print("actual time = ", (end - start)/60, "mins") # save samples f = h5py.File("%s/%s_samples.h5" % (path, id), "w") data = f.create_dataset("samples", np.shape(sampler.chain)) data[:, :] = np.array(sampler.chain) f.close() # make various plots if plot: with h5py.File("%s/%s_samples.h5" % (path, id), "r") as f: samples = f["samples"][...] mcmc_result = make_plot(samples, x, y, yerr, id, path, traces=True, tri=True, prediction=True)
def mcmc_fit(x, y, yerr, p_init, p_max, id, RESULTS_DIR, truths, burnin=500, nwalkers=12, nruns=10, full_run=500, diff_threshold=.5, n_independent=1000): """ Run the MCMC """ try: print("Total number of points = ", sum([len(i) for i in x])) print("Number of light curve sections = ", len(x)) except TypeError: print("Total number of points = ", len(x)) theta_init = np.log([np.exp(-12), np.exp(7), np.exp(-1), np.exp(-17), p_init]) runs = np.zeros(nruns) + full_run ndim = len(theta_init) print("p_init = ", p_init, "days, log(p_init) = ", np.log(p_init), "p_max = ", p_max) args = (x, y, yerr, np.log(p_init), p_max) # Time the LHF call. start = time.time() mod = MyModel(x, y, yerr, np.log(p_init), p_max) print("lnlike = ", mod.lnlike_split(theta_init), "lnprior = ", mod.Glnprior(theta_init), "\n") end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("each run will take", tm * nwalkers * runs[0]/60, "mins") print("total = ", (tm * nwalkers * np.sum(runs) + tm * nwalkers * burnin)/60, "mins") # Run MCMC. mod = MyModel(x, y, yerr, np.log(p_init), p_max) model = emcee3.SimpleModel(mod.lnlike_split, mod.Glnprior) p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)] ensemble = emcee3.Ensemble(model, p0) moves = emcee3.moves.KDEMove() sampler = emcee3.Sampler(moves) # sampler = emcee3.Sampler() print("burning in...") total_start = time.time() ensemble = sampler.run(ensemble, burnin) flat = sampler.get_coords(flat=True) logprob = sampler.get_log_probability(flat=True) ensemble = emcee3.Ensemble(model, p0) # repeating MCMC runs. autocorr_times, mean_ind, mean_diff = [], [], [] sample_array = np.zeros((nwalkers, sum(runs), ndim)) for i, run in enumerate(runs): print("run {0} of {1}".format(i, len(runs))) print("production run, {0} steps".format(int(run))) start = time.time() ensemble = sampler.run(ensemble, run) end = time.time() print("time taken = ", (end - start)/60, "minutes") f = h5py.File(os.path.join(RESULTS_DIR, "{0}.h5".format(id)), "w") data = f.create_dataset("samples", np.shape(sampler.get_coords(flat=True))) data[:, :] = sampler.get_coords(flat=True) f.close() print("samples = ", np.shape(sampler.get_coords(flat=True))) results = make_plot(sampler, x, y, yerr, id, RESULTS_DIR, truths, traces=True, tri=True, prediction=True) nsteps, _ = np.shape(sampler.get_coords(flat=True)) conv, autocorr_times, ind_samp, diff = \ evaluate_convergence(sampler.get_coords(flat=True), autocorr_times, diff_threshold, n_independent) mean_ind.append(ind_samp) mean_diff.append(diff) print("Converged?", conv) if conv: break total_end = time.time() total_time = total_end - total_start print("Total time taken = ", total_time/60., "minutes", total_time/3600., "hours") with open(os.path.join(RESULTS_DIR, "{0}_time.txt".format(id)), "w") as f: f.write("{}".format(total_time)) # col = "b" # if conv: # col = "r" # if autocorr_times: # plt.clf() # plt.plot(autocorr_times, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_acorr".format(id))) # plt.clf() # plt.plot(mean_ind, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_ind".format(id))) # plt.clf() # plt.plot(mean_diff, color=col) # plt.savefig(os.path.join(RESULTS_DIR, "{0}_diff".format(id))) return
skip_header=2).T for i, kid in enumerate(kids[1:]): x, y, yerr = load_data(str(int(kid))) x -= x[0] m = x < 5 x, y, yerr = x[m], y[m], yerr[m] # initialise init_file = "../data/%s_init.txt" % int(kid) if os.path.exists(init_file): theta_init = np.genfromtxt(init_file).T else: theta_init = [1, 1, 1, np.log(p[i]), 1.] k = theta_init[0] * ExpSquaredKernel(theta_init[1]) \ * ExpSine2Kernel(theta_init[2], theta_init[3]) gp = george.GP(k) gp.compute(x, np.sqrt(theta_init[4]**2+yerr**2)) # predict xs = np.linspace(x[0], x[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x, y, yerr=yerr, **reb) plt.plot(xs, mu) plt.show() plims = [2., 30.] DIR = "../figs" sampler = MCMC(theta_init, x, y, yerr, plims, 100, 200, int(kid), DIR) make_plot(sampler, x, y, yerr, int(kid), DIR, traces=True) assert 0
def fit(x, y, yerr, id, p_init, plims, DIR, burnin=500, run=1500, npts=48, cutoff=100, sine_kernel=False, acf=False, runMCMC=True, plot=False): """ takes x, y, yerr and initial guesses and priors for period and does the full GP MCMC. Tuning parameters include cutoff (number of days), npts (number of points per bin). DIR is where to save output """ # measure ACF period if acf: corr_run(x, y, yerr, id, "/Users/angusr/Python/GProtation/code") if sine_kernel: print("sine kernel") theta_init = [np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16), p_init] print(theta_init) from GProtation import MCMC, make_plot else: print("cosine kernel") theta_init = [1e-2, 1., 1e-2, p_init] print("theta_init = ", np.log(theta_init)) from GProtation_cosine import MCMC, make_plot xb, yb, yerrb = bin_data(x, y, yerr, npts) # bin data m = xb < cutoff # truncate theta_init = np.log(theta_init) print(theta_init) if runMCMC: sampler = MCMC(theta_init, xb[m], yb[m], yerrb[m], plims, burnin, run, id, DIR) # make various plots if plot: with h5py.File("%s/%s_samples.h5" % (DIR, id), "r") as f: samples = f["samples"][...] m2 = x < cutoff mcmc_result = make_plot(samples, xb[m], yb[m], yerrb[m], x[m2], y[m2], yerr[m2], str(int(id)).zfill(4), DIR, traces=True, tri=True, prediction=True)
def recover_injections(id, x, y, yerr, fn, burnin, run, interval, tol, npts=10, nwalkers=32, plot=True): """ Take x, y, yerr, calculate ACF period for initialisation and do MCMC. npts: number of points per period. """ p_init, acf_smooth, lags, _, _, _, _, _, _ = simple_acf(x, y) print("acf period = ", p_init) if p_init < .1: # prevent unphysical periods p_init = 10. # Format data plims = np.log([p_init - tol*p_init, p_init + tol*p_init]) print(p_init, np.exp(plims)) sub = int(p_init / float(npts) * 48) # 10 points per period ppd = 48. / sub ppp = ppd * p_init print("sub = ", sub, "points per day =", ppd, "points per period =", ppp) # subsample xsub, ysub, yerrsub = x[::sub], y[::sub], yerr[::sub] xb, yb, yerrb = x, y, yerr xb, yb, yerrb = x[:100], y[:100], yerr[:100] plt.clf() plt.plot(xb, yb, "k.") plt.savefig("gptest") theta_init = np.log([np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16), p_init]) print("\n", "log(theta_init) = ", theta_init) print("theta_init = ", np.exp(theta_init), "\n") # set up MCMC ndim, nwalkers = len(theta_init), nwalkers p0 = [theta_init+1e-4*np.random.rand(ndim) for i in range(nwalkers)] args = (xb, yb, yerrb, plims) lp = lnprob # time the lhf call start = time.time() print("lnprob = ", lp(theta_init, xb, yb, yerrb, plims)) end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("run will take", tm * nwalkers * run, "s") print("total = ", (tm * nwalkers * run + tm * nwalkers * burnin)/60, "mins") # run MCMC sampler = emcee.EnsembleSampler(nwalkers, ndim, lp, args=args) print("burning in...") start = time.time() p0, lp, state = sampler.run_mcmc(p0, burnin) sampler.reset() print("production run...") p0, lp, state = sampler.run_mcmc(p0, run) end = time.time() print("actual time = ", end - start) # save samples f = h5py.File("%s_samples.h5" % id, "w") data = f.create_dataset("samples", np.shape(sampler.chain)) data[:, :] = np.array(sampler.chain) f.close() # make various plots if plot: with h5py.File("%s_samples.h5" % id, "r") as f: samples = f["samples"][...] mcmc_result = make_plot(samples, xsub, ysub, yerrsub, id, fn, traces=True, tri=True, prediction=True)
# print "acf period, err = ", p_init # # load samples # with h5py.File("%s/%s_samples.h5" % (DIR, str(int(id)).zfill(4)), # "r") as f: # samples = f["samples"][...] # nwalkers, nsteps, ndims = np.shape(samples) # flat = np.reshape(samples, (nwalkers * nsteps, ndims)) # mcmc_result = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), # zip(*np.percentile(flat, [16, 50, 84], axis=0))) # bin and truncate data # npts = int(p_init[0] / 10. * 48) # 10 points per period # xb, yb, yerrb = bin_data(x, y, yerr, npts) # bin data # m = xb < cutoff # truncate m = x < 10 DIR = "sine" for id in range(100): # load samples with h5py.File("%s/%s_samples.h5" % (DIR, str(int(id)).zfill(4)), "r") as f: samples = f["samples"][...] nwalkers, nsteps, ndims = np.shape(samples) flat = np.reshape(samples, (nwalkers * nsteps, ndims)) mcmc_result = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(flat, [16, 50, 84], axis=0))) # make various plots make_plot(samples, x[m], y[m], yerr[m], id, DIR, traces=False, tri=True, prediction=False)
def recover_injections(id, x, y, yerr, path, burnin, run, nwalkers=32, plot=True): """ Take x, y, yerr, calculate ACF period for initialisation and do MCMC. npts: number of points per period. id: star id. x, y, yerr: time, flux and error arrays. path: path where you want to save the output. burnin: the number of burnin steps. run: the number of steps to run for. nwalkers: the number of walkers. plot: if True then plots of posteriors and chains will be made. """ # initialise with pgram try: p_init = np.genfromtxt("{0}/{1}_pgramresult.txt".format(path, id)) except: p_init = periodograms(id, x, y, yerr, path, plot=True) if p_init < .5: # small periods raise an error with george. p_init = 1. # If using lnprob, plims = [pmin, pmax] for a uniform prior. # If using Gprob, plims = [mu, sigma] for a Gaussian prior. # plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init]) plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init]) # plims = np.log([p_init, p_init*.1]) # mean, sigma print("Initial period and limits:", p_init, np.exp(plims)) # assign theta_init theta_init = np.log( [np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16), p_init]) print("\n", "log(theta_init) = ", theta_init) print("theta_init = ", np.exp(theta_init), "\n") # plot initialisation t = np.exp(theta_init) k = t[0] * ExpSquaredKernel(t[1]) * ExpSine2Kernel(t[2], t[3]) gp = george.GP(k) gp.compute(x, yerr) xs = np.linspace(x[0], x[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x, y, yerr=yerr, **reb) plt.plot(xs, mu, color=cols.blue) plt.savefig("{0}/{1}_init".format(path, id)) # set up MCMC ndim, nwalkers = len(theta_init), nwalkers p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)] args = (x, y, yerr, plims) # time the lhf call start = time.time() print("lnprob = ", lnprob(theta_init, x, y, yerr, plims)) # print("lnprob = ", Gprob(theta_init, x, y, yerr, plims)) end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("run will take", tm * nwalkers * run, "s") print("total = ", (tm*nwalkers*run + tm*nwalkers*burnin)/60, \ "mins,", (tm*nwalkers*run + tm*nwalkers*burnin)/3600, "hours") # run MCMC sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args) # sampler = emcee.EnsembleSampler(nwalkers, ndim, Gprob, args=args) print("burning in...") start = time.time() p0, lp, state = sampler.run_mcmc(p0, burnin) sampler.reset() print("production run...") p0, lp, state = sampler.run_mcmc(p0, run) end = time.time() print("actual time = ", (end - start) / 60, "mins") # save samples f = h5py.File("%s/%s_samples.h5" % (path, id), "w") data = f.create_dataset("samples", np.shape(sampler.chain)) data[:, :] = np.array(sampler.chain) f.close() # make various plots if plot: with h5py.File("%s/%s_samples.h5" % (path, id), "r") as f: samples = f["samples"][...] mcmc_result = make_plot(samples, x, y, yerr, id, path, traces=True, tri=True, prediction=True)