def GP(kernel, kernel_params, white=False): ''' ''' if kernel == 'Basic': w, a, t = kernel_params if white: if OLDGEORGE: return george.GP( WhiteKernel(w**2) + a**2 * Matern32Kernel(t**2)) else: return george.GP(a**2 * Matern32Kernel(t**2), white_noise=np.log(w**2), fit_white_noise=True) else: return george.GP(a**2 * Matern32Kernel(t**2)) elif kernel == 'QuasiPeriodic': w, a, g, p = kernel_params if white: if OLDGEORGE: return george.GP( WhiteKernel(w**2) + a**2 * ExpSine2Kernel(g, p)) else: return george.GP(a**2 * ExpSine2Kernel(g, p), white_noise=np.log(w**2), fit_white_noise=True) else: return george.GP(a**2 * ExpSine2Kernel(g, p)) else: raise ValueError('Invalid value for `kernel`.')
def GP(kernel, kernel_params, white = False): if kernel == 'Basic': w, a, t = kernel_params if white: return george.GP(WhiteKernel(w ** 2) + a ** 2 * Matern32Kernel(t ** 2)) else: return george.GP(a ** 2 * Matern32Kernel(t ** 2)) elif kernel == 'QuasiPeriodic': w, a, g, p = kernel_params if white: return george.GP(WhiteKernel(w ** 2) + a ** 2 * ExpSine2Kernel(g, p)) else: return george.GP(a ** 2 * ExpSine2Kernel(g, p)) else: raise ValueError('Invalid value for `kernel`.')
def gp_kernel(self, theta): A = np.exp(theta[0]) l = np.exp(theta[1]) G = np.exp(theta[2]) sigma = np.exp(theta[3]) P = np.exp(theta[4]) return A * ExpSquaredKernel(l) * ExpSine2Kernel(G, P) + WhiteKernel(sigma)
def neglnlike(theta, x, y, yerr): theta = np.exp(theta) k = theta[0] * ExpSine2Kernel(theta[2], theta[1]) * ExpSquaredKernel( theta[3]) gp = george.GaussianProcess(k) gp.compute(x, (theta[4] * yerr**2)) return -gp.lnlikelihood(y)
def multilnlike(theta, x1, x2, x3, x4, y1, y2, y3, y4, yerr1, yerr2, yerr3, yerr4, p): lnlike = [] theta = np.exp(theta) k = theta[0] * ExpSquaredKernel(theta[1]) * ExpSine2Kernel(theta[2], p) gp = george.GP(k) try: gp.compute(x1, np.sqrt(theta[3]+yerr1**2)) except (ValueError, np.linalg.LinAlgError): return 10e25 lnlike.append(-gp.lnlikelihood(y1, quiet=True)) try: gp.compute(x2, np.sqrt(theta[4]+yerr2**2)) except (ValueError, np.linalg.LinAlgError): return 10e25 lnlike.append(-gp.lnlikelihood(y2, quiet=True)) try: gp.compute(x3, np.sqrt(theta[5]+yerr3**2)) except (ValueError, np.linalg.LinAlgError): return 10e25 lnlike.append(-gp.lnlikelihood(y3, quiet=True)) try: gp.compute(x4, np.sqrt(theta[6]+yerr4**2)) except (ValueError, np.linalg.LinAlgError): return 10e25 lnlike.append(-gp.lnlikelihood(y4, quiet=True)) return np.logaddexp.reduce(np.array(lnlike), axis=0)
def make_plots(id, RESULTS_DIR="/Users/ruthangus/projects/GProtation/code/" \ "results_acfprior_03_10"): """ Make a plot of the fit to the light curve and the posteriors. """ """ load lc """ x, y = load_suzanne_lcs(id) yerr = np.ones(len(y)) * 1e-5 m = x < 100 x, y, yerr = x[m], y[m], yerr[m] """ load posteriors """ fn = os.path.join(RESULTS_DIR, "{}.h5".format(id)) df = pd.read_hdf(fn, key="samples") """ find medians """ theta = [np.median(df.iloc[:, i]) for i in range(5)] """ fit GP """ print(np.exp(theta[-1]), "period") k = theta[0] * ExpSquaredKernel(theta[1]) \ * ExpSine2Kernel(theta[2], theta[4]) + WhiteKernel(theta[3]) gp = george.GP(k, solver=george.HODLRSolver) gp.compute(x - x[0], yerr) xs = np.linspace((x - x[0])[0], (x - x[0])[-1], 1000) mu, cov = gp.predict(y, xs) """ plot fit """ plt.clf() plt.plot(x, y, "k.") plt.plot(xs, mu) plt.xlim(0, 100) # v = np.std(y) # plt.ylim(-10*v, 10*v) plt.savefig("{}_fit".format(id))
def predict(xs, x, y, yerr, theta): theta = np.exp(theta) k = theta[0] * ExpSine2Kernel(theta[2], theta[1]) * ExpSquaredKernel( theta[3]) gp = george.GaussianProcess(k) # j2 = np.exp(2)*theta[4] # gp.compute(x, np.sqrt(yerr**2 + j2)) gp.compute(x, (theta[4] * yerr**2)) return gp.predict(y, xs)
def lnlike(theta, x, y, yerr): theta = np.exp(theta) k = theta[0] * ExpSine2Kernel(theta[2], theta[1]) * ExpSquaredKernel( theta[3]) gp = george.GaussianProcess(k) # j2 = np.exp(2)*theta[4] # gp.compute(x, np.sqrt(yerr**2 + j2)) gp.compute(x, (theta[4] * yerr**2)) return gp.lnlikelihood(y)
def neglnlike(theta, x, y, yerr, p): theta = np.exp(theta) k = theta[0] * ExpSquaredKernel(theta[1]) * ExpSine2Kernel(theta[2], p) gp = george.GP(k) try: gp.compute(x, np.sqrt(theta[3]+yerr**2)) except (ValueError, np.linalg.LinAlgError): return 10e25 return -gp.lnlikelihood(y, quiet=True)
def MCMC(theta, x, y, yerr, fname, burn_in, nsteps, nruns): # calculate initial likelihood and plot initial hparams xs = np.linspace(min(x), max(x), 1000) k = theta[0] * ExpSquaredKernel(theta[1]) * ExpSine2Kernel(theta[2], theta[4]) k += WhiteKernel(theta[3]) gp = george.GP(k) print 'initial lnlike = ', lnlike(theta, x, y, yerr) mu, cov = predict(theta, xs, x, y, yerr) plt.clf() plt.errorbar(x, y, yerr=yerr, fmt='k.', capsize=0) plt.plot(xs, mu, 'r') std = np.sqrt(np.diag(cov)) # plt.fill_between(mu-std, mu+std, color='r', alpha='.5') plt.savefig('%s_init' % fname) # setup sampler nwalkers, ndim = 32, len(theta) p0 = [theta+1e-4*np.random.rand(ndim) for i in range(nwalkers)] args = [x, y, yerr] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args) print("Burning in...") p0, lp, state = sampler.run_mcmc(p0, burn_in) sampler.reset() for i in range(nruns): print 'Running... ', i p0, lp, state = sampler.run_mcmc(p0, nsteps) # results samples = sampler.chain[:, 50:, :].reshape((-1, ndim)) mcmc_result = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) mres = np.array(mcmc_result)[:, 0] print 'mcmc_result = ', np.exp(mres) np.savetxt("parameters_%s.txt" % fname, np.array(mcmc_result)) print "saving samples" f = h5py.File("samples%s" % fname, "w") data = f.create_dataset("samples", np.shape(sampler.chain)) data[:,:] = np.array(sampler.chain) f.close() # make triangle plot fig_labels = ["$A$", "$l1$", "$l2$", "$wn$", "$P$"] fig = triangle.corner(samples, truths=mres, labels=fig_labels) fig.savefig("triangle_%s.png" % fname) # plot result mu, cov = predict(mres, xs, x, y, yerr) plt.clf() plt.errorbar(x, y, yerr=yerr, fmt='k.', capsize=0) plt.plot(xs, mu, 'r') plt.savefig('%s_final' % fname)
def make_plot(sampler, x, y, yerr, ID, DIR, traces=False, tri=False, prediction=True): nwalkers, nsteps, ndims = np.shape(sampler) flat = np.reshape(sampler, (nwalkers * nsteps, ndims)) mcmc_result = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(flat, [16, 50, 84], axis=0))) mcmc_result = np.array([i[0] for i in mcmc_result]) print("\n", np.exp(np.array(mcmc_result[-1])), "period (days)", "\n") print(mcmc_result) np.savetxt("%s/%s_result.txt" % (DIR, ID), mcmc_result) fig_labels = ["A", "l", "G", "s", "P"] if traces: print("Plotting traces") for i in range(ndims): plt.clf() plt.plot(sampler[:, :, i].T, 'k-', alpha=0.3) plt.ylabel(fig_labels[i]) plt.savefig("%s/%s_%s.png" % (DIR, ID, fig_labels[i])) if tri: print("Making triangle plot") flat[:, -1] = np.exp(flat[:, -1]) try: fig = corner.corner(flat, labels=fig_labels) except: fig = triangle.corner(flat, labels=fig_labels) fig.savefig("%s/%s_triangle" % (DIR, ID)) print("%s/%s_triangle.png" % (DIR, ID)) if prediction: print("plotting prediction") theta = np.exp(np.array(mcmc_result)) k = theta[0] * ExpSquaredKernel(theta[1]) \ * ExpSine2Kernel(theta[2], theta[4]) gp = george.GP(k, solver=george.HODLRSolver) gp.compute(x, yerr) xs = np.linspace(x[0], x[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x - x[0], y, yerr=yerr, **reb) plt.xlabel("$\mathrm{Time~(days)}$") plt.ylabel("$\mathrm{Normalised~Flux}$") plt.plot(xs, mu, color=cols.lightblue) plt.xlim(min(x), max(x)) plt.savefig("%s/%s_prediction" % (DIR, ID)) print("%s/%s_prediction.png" % (DIR, ID))
def lnlike(theta, x, y, yerr): theta = np.exp(theta) k = theta[0] * ExpSquaredKernel(theta[1]) \ * ExpSine2Kernel(theta[2], theta[4]) + WhiteKernel(theta[3]) gp = george.GP(k, solver=george.HODLRSolver) try: gp.compute(x, np.sqrt(theta[3]+yerr**2)) except (ValueError, np.linalg.LinAlgError): return 10e25 return gp.lnlikelihood(y, quiet=True)
def multilnlike_emcee_comb(theta, x, y, yerr): yerr[:121] = np.sqrt(theta[3]+yerr[:121]**2) yerr[122:304] = np.sqrt(theta[4]+yerr[122:304]**2) yerr[305:332] = np.sqrt(theta[5]+yerr[305:332]**2) yerr[333:] = np.sqrt(theta[6]+yerr[333:]**2) theta = np.exp(theta) k = theta[0] * ExpSquaredKernel(theta[1]) * ExpSine2Kernel(theta[2], theta[7]) gp = george.GP(k) try: gp.compute(x, yerr) except (ValueError, np.linalg.LinAlgError): return 10e25 return gp.lnlikelihood(y, quiet=True)
def plot_lc(koi): """ Make demo plot of a light curve. """ # Load the data print(LC_DIR) x, y, yerr = kd.load_kepler_data(LC_DIR) x -= x[0] m = x < 500 x, y, yerr = x[m], y[m], yerr[m] # Load the posterior samples. df = pd.read_hdf(os.path.join(DATA_DIR, "KOI-{}.h5".format(int(koi))), key="samples") a = np.exp(MAP(df.ln_A.values)) l = np.exp(MAP(df.ln_l.values)) g = np.exp(MAP(df.ln_G.values)) s = np.exp(MAP(df.ln_sigma.values)) p = np.exp(MAP(df.ln_period.values)) print("ln(a) = ", np.log(a), "ln(l) = ", np.log(l), "ln(G) = ", np.log(g), "ln(s) = ", np.log(s), "ln(p) = ", np.log(p), "p = ", p) xs = np.linspace(min(x), max(x), 500) k = a * ExpSquaredKernel(l) \ * ExpSine2Kernel(g, p) + WhiteKernel(s) gp = george.GP(k) gp.compute(x, yerr) mu, cov = gp.predict(y, xs) plt.clf() plt.plot(x, y, "k.") plt.plot(xs, mu, color="CornFlowerBlue") plt.xlabel("$\mathrm{Time~(days)}$") plt.ylabel("$\mathrm{Normalised~flux}$") plt.subplots_adjust(left=.18) plt.savefig(os.path.join(FIG_DIR, "koi_lc_demo.pdf"))
from george.kernels import ExpSquaredKernel, ExpSine2Kernel, RationalQuadraticKernel, ConstantKernel import numpy as np #kernel = 0.378**2 * ExpSquaredKernel(48.5) * (ExpSine2Kernel(gamma=3.0, log_period=0.0) + NegativeConstantKernel(np.log(0.38))) + 0.0831**2 * ExpSquaredKernel(32.4) * (ExpSine2Kernel(gamma=3.03, log_period=np.log(0.5)) + ConstantKernel(np.log(0.305))) + 0.15**2 * ExpSquaredKernel(0.5) + 0.10**2 * RationalQuadraticKernel(metric=0.025, log_alpha=-1) # kernel = 0.238**2 * ExpSquaredKernel(50) * (ExpSine2Kernel(gamma=10.355, log_period=0.0) + NegativeConstantKernel(np.log(0.146))) + 0.0388**2 * ExpSquaredKernel(40) * (ExpSine2Kernel(gamma=20.8, log_period=np.log(0.5)) + ConstantKernel(np.log(0.216))) + 0.0383**2 * ExpSquaredKernel(1.925) + 0.108**2 * RationalQuadraticKernel(metric=0.00164, log_alpha=-1.578) # kernel = 0.195**2 * ExpSquaredKernel(2000) * ExpSine2Kernel(gamma=14.964, log_period=0.0) + 0.0232**2 * ExpSquaredKernel(127.597) * (ExpSine2Kernel(gamma=20.909, log_period=np.log(0.5)) + ConstantKernel(np.log(0.653))) + 0.0321**2 * ExpSquaredKernel(5.028) + 0.0848**2 * RationalQuadraticKernel(metric=0.00207, log_alpha=-0.11357) #kernel = 0.191**2 * ExpSquaredKernel(1900) * ExpSine2Kernel(gamma=14.951, log_period=0.0) + 0.02227**2 * ExpSquaredKernel(133.358) * (ExpSine2Kernel(gamma=20.895, log_period=np.log(0.5)) + ConstantKernel(np.log(0.651))) + 0.0314**2 * ExpSquaredKernel(5.097) + 0.146**2 * RationalQuadraticKernel(metric=0.00207, log_alpha=-0.11357) kernel = 0.197**2 * ExpSquaredKernel(2128) * ExpSine2Kernel( gamma=13.332, log_period=0.0) + 0.0247**2 * ExpSquaredKernel(133.163) * ( ExpSine2Kernel(gamma=18.622, log_period=np.log(0.5)) + ConstantKernel(np.log(0.604))) + 0.0340**2 * ExpSquaredKernel( 4.166) + 0.0771**2 * RationalQuadraticKernel(metric=0.00347, log_alpha=0.3611) kernel.freeze_parameter('k1:k1:k1:k2:log_period') kernel.freeze_parameter('k1:k1:k2:k2:k1:log_period')
def predict(theta, xs, x, y, yerr, p): theta = np.exp(theta) k = theta[0] * ExpSquaredKernel(theta[1]) * ExpSine2Kernel(theta[2], p) gp = george.GP(k) gp.compute(x, np.sqrt(theta[3]+yerr**2)) return gp.predict(y, xs)
def recover_injections(id, x, y, yerr, path, burnin, run, nwalkers=32, plot=True): """ Take x, y, yerr, calculate ACF period for initialisation and do MCMC. npts: number of points per period. id: star id. x, y, yerr: time, flux and error arrays. path: path where you want to save the output. burnin: the number of burnin steps. run: the number of steps to run for. nwalkers: the number of walkers. plot: if True then plots of posteriors and chains will be made. """ # initialise with pgram try: p_init = np.genfromtxt("{0}/{1}_pgramresult.txt".format(path, id)) except: p_init = periodograms(id, x, y, yerr, path, plot=True) if p_init < .5: # small periods raise an error with george. p_init = 1. # If using lnprob, plims = [pmin, pmax] for a uniform prior. # If using Gprob, plims = [mu, sigma] for a Gaussian prior. # plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init]) plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init]) # plims = np.log([p_init, p_init*.1]) # mean, sigma print("Initial period and limits:", p_init, np.exp(plims)) # assign theta_init theta_init = np.log( [np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16), p_init]) print("\n", "log(theta_init) = ", theta_init) print("theta_init = ", np.exp(theta_init), "\n") # plot initialisation t = np.exp(theta_init) k = t[0] * ExpSquaredKernel(t[1]) * ExpSine2Kernel(t[2], t[3]) gp = george.GP(k) gp.compute(x, yerr) xs = np.linspace(x[0], x[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x, y, yerr=yerr, **reb) plt.plot(xs, mu, color=cols.blue) plt.savefig("{0}/{1}_init".format(path, id)) # set up MCMC ndim, nwalkers = len(theta_init), nwalkers p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)] args = (x, y, yerr, plims) # time the lhf call start = time.time() print("lnprob = ", lnprob(theta_init, x, y, yerr, plims)) # print("lnprob = ", Gprob(theta_init, x, y, yerr, plims)) end = time.time() tm = end - start print("1 lhf call takes ", tm, "seconds") print("burn in will take", tm * nwalkers * burnin, "s") print("run will take", tm * nwalkers * run, "s") print("total = ", (tm*nwalkers*run + tm*nwalkers*burnin)/60, \ "mins,", (tm*nwalkers*run + tm*nwalkers*burnin)/3600, "hours") # run MCMC sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args) # sampler = emcee.EnsembleSampler(nwalkers, ndim, Gprob, args=args) print("burning in...") start = time.time() p0, lp, state = sampler.run_mcmc(p0, burnin) sampler.reset() print("production run...") p0, lp, state = sampler.run_mcmc(p0, run) end = time.time() print("actual time = ", (end - start) / 60, "mins") # save samples f = h5py.File("%s/%s_samples.h5" % (path, id), "w") data = f.create_dataset("samples", np.shape(sampler.chain)) data[:, :] = np.array(sampler.chain) f.close() # make various plots if plot: with h5py.File("%s/%s_samples.h5" % (path, id), "r") as f: samples = f["samples"][...] mcmc_result = make_plot(samples, x, y, yerr, id, path, traces=True, tri=True, prediction=True)
from george.kernels import ExpSquaredKernel, ExpSine2Kernel, RationalQuadraticKernel, ConstantKernel import numpy as np #kernel = 0.378**2 * ExpSquaredKernel(48.5) * (ExpSine2Kernel(gamma=3.0, log_period=0.0) + NegativeConstantKernel(np.log(0.38))) + 0.0831**2 * ExpSquaredKernel(32.4) * (ExpSine2Kernel(gamma=3.03, log_period=np.log(0.5)) + ConstantKernel(np.log(0.305))) + 0.15**2 * ExpSquaredKernel(0.5) + 0.10**2 * RationalQuadraticKernel(metric=0.025, log_alpha=-1) # kernel = 0.238**2 * ExpSquaredKernel(50) * (ExpSine2Kernel(gamma=10.355, log_period=0.0) + NegativeConstantKernel(np.log(0.146))) + 0.0388**2 * ExpSquaredKernel(40) * (ExpSine2Kernel(gamma=20.8, log_period=np.log(0.5)) + ConstantKernel(np.log(0.216))) + 0.0383**2 * ExpSquaredKernel(1.925) + 0.108**2 * RationalQuadraticKernel(metric=0.00164, log_alpha=-1.578) # kernel = 0.195**2 * ExpSquaredKernel(2000) * ExpSine2Kernel(gamma=14.964, log_period=0.0) + 0.0232**2 * ExpSquaredKernel(127.597) * (ExpSine2Kernel(gamma=20.909, log_period=np.log(0.5)) + ConstantKernel(np.log(0.653))) + 0.0321**2 * ExpSquaredKernel(5.028) + 0.0848**2 * RationalQuadraticKernel(metric=0.00207, log_alpha=-0.11357) #kernel = 0.191**2 * ExpSquaredKernel(1900) * ExpSine2Kernel(gamma=14.951, log_period=0.0) + 0.02227**2 * ExpSquaredKernel(133.358) * (ExpSine2Kernel(gamma=20.895, log_period=np.log(0.5)) + ConstantKernel(np.log(0.651))) + 0.0314**2 * ExpSquaredKernel(5.097) + 0.146**2 * RationalQuadraticKernel(metric=0.00207, log_alpha=-0.11357) kernel = 0.197**2 * ExpSquaredKernel(2128) * ExpSine2Kernel( gamma=13.332, log_period=0.0) + 0.0247**2 * ExpSquaredKernel(133.163) * ( ExpSine2Kernel(gamma=18.622, log_period=np.log(0.5)) + ConstantKernel(np.log(0.604))) + 0.0340**2 * ExpSquaredKernel( 4.166) + 0.0771**2 * RationalQuadraticKernel(metric=0.00347, log_alpha=0.3611) kernel.freeze_parameter('k1:k1:k1:k2:log_period') kernel.freeze_parameter('k1:k1:k2:k2:k1:log_period') # delta_kernel = 0.108**2 * ExpSquaredKernel(972) * ExpSine2Kernel(gamma=23.548, log_period=0.0) + 0.0421**2 * ExpSquaredKernel(1e6) * (ExpSine2Kernel(gamma=2.270, log_period=np.log(0.5)) + ConstantKernel(np.log(0.000877))) + 0.0417**2 * ExpSquaredKernel(3.233) + 0.0737**2 * RationalQuadraticKernel(metric=0.00436, log_alpha=17.903) # delta_kernel = 0.109**2 * ExpSquaredKernel(689) * ExpSine2Kernel(gamma=22.638, log_period=0.0) + 0.00554**2 * ExpSquaredKernel(1e6) * (ExpSine2Kernel(gamma=5.667, log_period=np.log(0.5)) + ConstantKernel(np.log(0.000845))) + 0.0483**2 * ExpSquaredKernel(2.788) + 0.0757**2 * RationalQuadraticKernel(metric=0.00424, log_alpha=17.903) # 2021-04-04 # delta_kernel = 0.109**2 * ExpSquaredKernel(689) * ExpSine2Kernel(gamma=22.638, log_period=0.0) + 0.00554**2 * ExpSquaredKernel(1e6) * (ExpSine2Kernel(gamma=5.667, log_period=np.log(0.5)) + ConstantKernel(np.log(0.000845))) + 0.0483**2 * ExpSquaredKernel(2.788) + 0.0757**2 * RationalQuadraticKernel(metric=0.00424, log_alpha=17.903) # 2021-04-05 # delta_kernel = 0.109**2 * ExpSquaredKernel(689) * ExpSine2Kernel(gamma=22.638, log_period=0.0) + 0.0483**2 * ExpSquaredKernel(2.788) + 0.0757**2 * RationalQuadraticKernel(metric=0.00424, log_alpha=17.903) # 2021-04-05.2 # delta_kernel = 0.0917**2 * ExpSquaredKernel(611) * ExpSine2Kernel(gamma=30.976, log_period=0.0) + 0.0813**2 * ExpSquaredKernel(10.105) + 0.100**2 * RationalQuadraticKernel(metric=0.00175, log_alpha=-1.106) + ConstantKernel(np.log(0.04)) # 2021-04-05.3
def make_plot(sampler, xb, yb, yerrb, ID, RESULTS_DIR, trths, traces=False, tri=False, prediction=True): nwalkers, nsteps, ndims = np.shape(sampler) flat = np.reshape(sampler, (nwalkers * nsteps, ndims)) mcmc_res = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(flat, [16, 50, 84], axis=0))) med = np.concatenate([np.array(mcmc_res[i]) for i in range(len(mcmc_res))]) print("median values = ", med[::3]) logprob = flat[:, -1] ml = logprob == max(logprob) maxlike = flat[np.where(ml)[0][0], :][:-1] print("max like = ", maxlike) print("\n", np.exp(np.array(maxlike[-1])), "period (days)", "\n") r = np.concatenate((maxlike, med)) # calculate autocorrelation times acorr_t = emcee.autocorr.integrated_time(flat) # save data df = pd.DataFrame({"N": [ID], "A_max": [r[0]], "l_max": [r[1]], "gamma_max": [r[2]], "period_max": [r[3]], "sigma_max": [r[4]], "A": [r[5]], "A_errp": [r[6]], "A_errm": [r[7]], "l": [r[8]], "l_errp": [r[9]], "l_errm": [r[10]], "gamma": [r[11]], "gamma_errp": [r[12]], "gamma_errm": [r[13]], "sigma": [r[14]], "sigma_errp": [r[15]], "sigma_errm": [r[16]], "period": [r[17]], "period_errp": [r[18]], "period_errm": [r[19]], "acorr_A": acorr_t[0], "acorr_l": acorr_t[1], "acorr_gamma": acorr_t[2], "acorr_sigma": acorr_t[3], "acorr_period": acorr_t[4]}) df.to_csv(os.path.join(RESULTS_DIR, "{0}_mcmc_results.txt".format(ID))) fig_labels = ["ln(A)", "ln(l)", "ln(G)", "ln(s)", "ln(P)", "lnprob"] if traces: print("Plotting traces") for i in range(ndims): plt.clf() plt.plot(sampler[:, :, i].T, 'k-', alpha=0.3) plt.ylabel(fig_labels[i]) plt.savefig(os.path.join(RESULTS_DIR, "{0}_{1}.png".format(ID, fig_labels[i]))) if tri: DIR = "../code/simulations/kepler_diffrot_full/par/" truths = pd.read_csv(os.path.join(DIR, "final_table.txt"), delimiter=" ") true_p = np.log(truths.P_MIN.values[truths.N.values == int(filter(str.isdigit, ID))][0]) trths[-1] = true_p print("Making triangle plot") fig = corner.corner(flat[:, :-1], labels=fig_labels, quantiles=[.16, .5, .84], show_titles=True, truths=trths) fig.savefig(os.path.join(RESULTS_DIR, "{0}_triangle".format(ID))) print(os.path.join("{0}_triangle.png".format(ID))) if prediction: if len(xb) > 1: # if the data is a list of lists. try: x = [i for j in xb for i in j] y = [i for j in yb for i in j] yerr = [i for j in yerrb for i in j] except: # if the data are just a single list. TypeError x, y, yerr = xb, yb, yerrb else: # if the data is a list of a single list. x, y, yerr = xb[0], yb[0], yerrb[0] print("plotting prediction") theta = np.exp(np.array(maxlike)) k = theta[0] * ExpSquaredKernel(theta[1]) \ * ExpSine2Kernel(theta[2], theta[4]) + WhiteKernel(theta[3]) gp = george.GP(k, solver=george.HODLRSolver) gp.compute(x-x[0], yerr) xs = np.linspace((x-x[0])[0], (x-x[0])[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x-x[0], y, yerr=yerr, fmt="k.", capsize=0) plt.xlabel("Time (days)") plt.ylabel("Normalised Flux") plt.plot(xs, mu, color='#0066CC') plt.xlim(min(x-x[0]), max(x-x[0])) plt.savefig(os.path.join(RESULTS_DIR, "{0}_prediction".format(ID))) print(os.path.join(RESULTS_DIR, "{0}_prediction.png".format(ID))) return r
def MCMC(theta_init, x, y, yerr, plims, burnin, run, ID, DIR, nwalkers=32, logsamp=True, plot_inits=False): # figure out whether x, y and yerr are arrays or lists of lists quarters = False if len(x) < 20: quarters = True print("Quarter splits detected") print("\n", "log(theta_init) = ", theta_init) print("theta_init = ", np.exp(theta_init), "\n") if plot_inits: # plot initial guess and the result of minimize if quarters: xl = [i for j in x for i in j] yl = [i for j in y for i in j] yerrl = [i for j in yerr for i in j] print("plotting inits") print(np.exp(theta_init)) t = np.exp(theta_init) k = t[0] * ExpSquaredKernel(t[1]) * ExpSine2Kernel(t[2], t[3]) gp = george.GP(k) gp.compute(xl, yerrl) xs = np.linspace(xl[0], xl[-1], 1000) mu, cov = gp.predict(yl, xs) plt.clf() plt.errorbar(xl, yl, yerr=yerrl, **reb) plt.plot(xs, mu, color=cols.blue) args = (xl, yl, yerrl) results = spo.minimize(neglnlike, theta_init, args=args) print("optimisation results = ", results.x) r = np.exp(results.x) k = r[0] * ExpSquaredKernel(r[1]) * ExpSine2Kernel(r[2], r[3]) gp = george.GP(k) gp.compute(xl, yerrl) mu, cov = gp.predict(yl, xs) plt.plot(xs, mu, color=cols.pink, alpha=.5) plt.savefig("%s/%s_init" % (DIR, ID)) print("%s/%s_init.png" % (DIR, ID)) ndim, nwalkers = len(theta_init), nwalkers p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)] args = (x, y, yerr, plims) lp = lnprob if quarters: # if fitting each quarter separately, use a different lnprob lp = lnprob_split sampler = emcee.EnsembleSampler(nwalkers, ndim, lp, args=args) print("burning in...") p0, lp, state = sampler.run_mcmc(p0, burnin) sampler.reset() print("production run...") p0, lp, state = sampler.run_mcmc(p0, run) # save samples f = h5py.File("%s/%s_samples.h5" % (DIR, ID), "w") data = f.create_dataset("samples", np.shape(sampler.chain)) data[:, :] = np.array(sampler.chain) f.close() return sampler
def make_plot(x, y, yerr, id, sampler, RESULTS_DIR, traces=False, tri=False, prediction=True): plt.clf() plt.plot(x, y, "k.") plt.savefig("test") nwalkers, nsteps, ndims = np.shape(sampler) flat = np.reshape(sampler, (nwalkers * nsteps, ndims)) mcmc_result = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(flat, [16, 50, 84], axis=0))) mcmc_result = np.array([i[0] for i in mcmc_result]) print("\n", np.exp(np.array(mcmc_result[-1])), "period (days)", "\n") print(mcmc_result) np.savetxt(os.path.join(RESULTS_DIR, "{0}_result.txt".format(id)), mcmc_result) fig_labels = ["ln(A)", "ln(l)", "ln(G)", "ln(s)", "ln(P)"] if traces: print("Plotting traces") for i in range(ndims): plt.clf() plt.plot(sampler[:, :, i].T, 'k-', alpha=0.3) plt.ylabel(fig_labels[i]) plt.savefig( os.path.join(RESULTS_DIR, "{0}_{1}.png".format(id, fig_labels[i]))) if tri: print("Making triangle plot") try: fig = corner.corner(flat, labels=fig_labels) except: fig = triangle.corner(flat, labels=fig_labels) fig.savefig(os.path.join(RESULTS_DIR, "{0}_triangle".format(id))) print(os.path.join(RESULTS_DIR, "{0}_triangle.png".format(id))) if prediction: print("plotting prediction") theta = np.exp(np.array(mcmc_result)) k = theta[0] * ExpSquaredKernel(theta[1]) \ * ExpSine2Kernel(theta[2], theta[4]) gp = george.GP(k, solver=george.HODLRSolver) gp.compute(x - x[0], (yerr**2 + theta[3]**2)**.5) xs = np.linspace((x - x[0])[0], (x - x[0])[-1], 1000) mu, cov = gp.predict(y, xs) plt.clf() plt.errorbar(x - x[0], y, yerr=yerr, fmt="k.", capsize=0) plt.xlabel("$\mathrm{Time~(days)}$") plt.ylabel("$\mathrm{Normalised~Flux}$") plt.plot(xs, mu, color='#66CCCC') plt.xlim(min(x - x[0]), max(x - x[0])) plt.savefig(os.path.join(RESULTS_DIR, "{0}_prediction".format(id))) print(os.path.join(RESULTS_DIR, "{0}_prediction.png".format(id)))
"lnl_a": ls_a, "lngamma": gammas, "lnsigma": ss, "lnperiod": periods }) df.to_csv("gp_truths.csv") xs_l = np.arange(0, 200, .02043365) # kepler cadence xs = xs_l[::10] for i, period in enumerate(periods): print(i, "of", len(periods)) sid = str(int(i)).zfill(4) A, l_p, gamma = np.exp(As[i]), np.exp(ls_p[i]), np.exp(gammas[i]) period, sigma, l_a = np.exp(periods[i]), np.exp(ss[i]), np.exp(ls_a[i]) kp = A * ExpSquaredKernel(l_p) \ * ExpSine2Kernel(gamma, period) gp = george.GP(kp) ys = gp.sample(xs) y_noise = ys + np.random.randn(len(ys)) * 1e-5 data = np.vstack((xs, ys)) data_noise = np.vstack((xs, y_noise)) np.savetxt(os.path.join(P_DIR, "{0}.txt".format(sid)), data.T) np.savetxt(os.path.join(P_DIR, "{0}_noise.txt".format(sid)), data_noise.T) plt.clf() plt.plot(xs, ys) plt.plot(xs, y_noise, "k.") plt.xlabel("time (days)") plt.savefig(os.path.join(P_DIR, "{0}".format(sid))) k = A * ExpSquaredKernel(l_a)
A, l = 100, 10 K = A**2 * ExpSquaredKernel(l**2) gp = george.GP(K) gp.compute(x, yerr) k = gp.get_matrix(x) # And plot it... plt.clf() fig = plt.figure() ax = fig.add_subplot(111) ax.matshow(k, cmap=plt.cm.gray) ax.set_xticklabels([]) ax.set_yticklabels([]) plt.savefig("SEmatrix") # set up a QP GP A, l, gamma, P = 100, 10, 1, 10 K = A**2 * ExpSquaredKernel(l**2) * ExpSine2Kernel(gamma, P) gp = george.GP(K) gp.compute(x, yerr) k = gp.get_matrix(x) # And plot it... plt.clf() fig = plt.figure() ax = fig.add_subplot(111) ax.matshow(k, cmap=plt.cm.gray) ax.set_xticklabels([]) ax.set_yticklabels([]) plt.savefig("QPmatrix")
def predict(xs, x, y, yerr, theta, P): theta = np.exp(theta) k = theta[0]*ExpSine2Kernel(theta[1], P) * ExpSquaredKernel(theta[2]) gp = george.GaussianProcess(k) gp.compute(x, (theta[3]*yerr**2)) return gp.predict(y, xs)