def gp_fit(t, y, yerr, t_grid, integrated=False, exp_time=60.): # optimize kernel hyperparameters and return fit + predictions with pm.Model() as model: logS0 = pm.Normal("logS0", mu=0.4, sd=5.0, testval=np.log(np.var(y))) logw0 = pm.Normal("logw0", mu=-3.9, sd=0.1) logQ = pm.Normal("logQ", mu=3.5, sd=5.0) # Set up the kernel and GP kernel = terms.SHOTerm(log_S0=logS0, log_w0=logw0, log_Q=logQ) if integrated: kernel_int = terms.IntegratedTerm(kernel, exp_time) gp = GP(kernel_int, t, yerr**2) else: gp = GP(kernel, t, yerr**2) # Add a custom "potential" (log probability function) with the GP likelihood pm.Potential("gp", gp.log_likelihood(y)) with model: map_soln = xo.optimize(start=model.test_point) mu, var = xo.eval_in_model(gp.predict(t_grid, return_var=True), map_soln) sd = np.sqrt(var) y_pred = xo.eval_in_model(gp.predict(t), map_soln) return map_soln, mu, sd, y_pred
def test_broadcast_dim(): logS0 = tt.scalar() logw0 = tt.scalar() logQ = tt.scalar() logS0.tag.test_value = -5.0 logw0.tag.test_value = -2.0 logQ.tag.test_value = 1.0 kernel = terms.SHOTerm(S0=tt.exp(logS0), w0=tt.exp(logw0), Q=tt.exp(logQ)) x = tt.vector() y = tt.vector() diag = tt.vector() x.tag.test_value = np.zeros(2) y.tag.test_value = np.zeros(2) diag.tag.test_value = np.ones(2) gp = GP(kernel, x, diag, J=2) loglike = gp.log_likelihood(y) args = [logS0, logw0, logQ, x, y, diag] grad = theano.function(args, theano.grad(loglike, args)) np.random.seed(42) N = 50 x = np.sort(10 * np.random.rand(N)) y = np.sin(x) diag = np.random.rand(N) grad(-5.0, -2.0, 1.0, x, y, diag)
def test_sho_reparam(seed=6083): S0 = 10.0 w0 = 0.5 Q = 3.2 kernel1 = terms.SHOTerm(S0=S0, w0=w0, Q=Q) kernel2 = terms.SHOTerm(Sw4=S0 * w0**4, w0=w0, Q=Q) func1 = theano.function([], kernel1.coefficients) func2 = theano.function([], kernel2.coefficients) for a, b in zip(func1(), func2()): assert np.allclose(a, b) kernel2 = terms.SHOTerm(log_Sw4=np.log(S0) + 4 * np.log(w0), w0=w0, Q=Q) func2 = theano.function([], kernel2.coefficients) for a, b in zip(func1(), func2()): assert np.allclose(a, b) Q = 0.1 kernel1 = terms.SHOTerm(S0=S0, w0=w0, Q=Q) kernel2 = terms.SHOTerm(Sw4=S0 * w0**4, w0=w0, Q=Q) func1 = theano.function([], kernel1.coefficients) func2 = theano.function([], kernel2.coefficients) for a, b in zip(func1(), func2()): assert np.allclose(a, b) kernel2 = terms.SHOTerm(log_Sw4=np.log(S0) + 4 * np.log(w0), w0=w0, Q=Q) func2 = theano.function([], kernel2.coefficients) for a, b in zip(func1(), func2()): assert np.allclose(a, b)
def test_fortran_order(seed=5091986): np.random.seed(seed) kernel = terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_w0=0.5) x = np.sort(np.random.uniform(0, 100, 100)) y = np.sin(x) yerr = np.random.uniform(0.1, 0.5, len(x)) diag = yerr**2 gp = GP(kernel, x, diag) loglike = gp.log_likelihood(y).eval() loglike_f = gp.log_likelihood(np.asfortranarray(y)).eval() assert np.allclose(loglike, loglike_f)
def multi_gp_predict(t, y, yerr, t_grid, integrated=False, exp_time=60.): # this code is GARBAGE. but in principle does gp_predict() for a full comb of modes. a_max = 0.55 # amplitude of central mode in m/s nu_max = 3.1e-3 # peak frequency in Hz c_env = 0.331e-3 # envelope width in Hz delta_nu = 0.00013 # Hz gamma = 1. / (2 * 24. * 60. * 60.) # s^-1 ; 2-day damping timescale freq_grid = np.arange(nu_max - 0.001, nu_max + 0.001, delta_nu) # magic numbers amp_grid = a_max**2 * np.exp(-(freq_grid - nu_max)**2 / (2. * c_env**2)) # amplitudes in m/s driving_amp_grid = np.sqrt(amp_grid * gamma * dt) log_S0_grid = [ np.log(d**2 / (dt * o)) for o, d in zip(omega_grid, driving_amp_grid) ] with pm.Model() as model: kernel = None for o, lS in zip(omega_grid, log_S0_grid): if kernel is None: kernel = terms.SHOTerm(log_S0=lS, log_w0=np.log(o), log_Q=np.log(o / gamma)) else: kernel += terms.SHOTerm(log_S0=lS, log_w0=np.log(o), log_Q=np.log(o / gamma)) if integrated: kernel_int = terms.IntegratedTerm(kernel, exp_time) gp = GP(kernel_int, t, yerr**2) else: gp = GP(kernel, t, yerr**2) gp.condition(y) mu, var = xo.eval_in_model(gp.predict(t_grid, return_var=True)) sd = np.sqrt(var) y_pred = xo.eval_in_model(gp.predict(t)) return y_pred, mu, sd
def test_integrated_diag(seed=1234): np.random.seed(seed) x = np.sort(np.random.uniform(0, 100, 100)) dt = 0.4 * np.min(np.diff(x)) yerr = np.random.uniform(0.1, 0.5, len(x)) diag = yerr**2 kernel = terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_w0=0.5) kernel += terms.RealTerm(log_a=0.1, log_c=0.4) a = kernel.get_celerite_matrices(x, diag)[0].eval() k0 = kernel.value(tt.zeros(1)).eval() assert np.allclose(a, k0 + diag) kernel = terms.IntegratedTerm(kernel, dt) a = kernel.get_celerite_matrices(x, diag)[0].eval() k0 = kernel.value(tt.zeros(1)).eval() assert np.allclose(a, k0 + diag)
def _get_theano_kernel(celerite_kernel): import celerite.terms as cterms if isinstance(celerite_kernel, cterms.TermSum): result = _get_theano_kernel(celerite_kernel.terms[0]) for k in celerite_kernel.terms[1:]: result += _get_theano_kernel(k) return result elif isinstance(celerite_kernel, cterms.TermProduct): return _get_theano_kernel(celerite_kernel.k1) * _get_theano_kernel( celerite_kernel.k2) elif isinstance(celerite_kernel, cterms.RealTerm): return terms.RealTerm(log_a=celerite_kernel.log_a, log_c=celerite_kernel.log_c) elif isinstance(celerite_kernel, cterms.ComplexTerm): if not celerite_kernel.fit_b: return terms.ComplexTerm( log_a=celerite_kernel.log_a, b=0.0, log_c=celerite_kernel.log_c, log_d=celerite_kernel.log_d, ) return terms.ComplexTerm( log_a=celerite_kernel.log_a, log_b=celerite_kernel.log_b, log_c=celerite_kernel.log_c, log_d=celerite_kernel.log_d, ) elif isinstance(celerite_kernel, cterms.SHOTerm): return terms.SHOTerm( log_S0=celerite_kernel.log_S0, log_Q=celerite_kernel.log_Q, log_w0=celerite_kernel.log_omega0, ) elif isinstance(celerite_kernel, cterms.Matern32Term): return terms.Matern32Term( log_sigma=celerite_kernel.log_sigma, log_rho=celerite_kernel.log_rho, ) raise NotImplementedError()
def gp_predict(t, y, yerr, t_grid, logS0=0.4, logw0=-3.9, logQ=3.5, integrated=False, exp_time=60.): # take kernel hyperparameters as fixed inputs, train + predict with pm.Model() as model: kernel = terms.SHOTerm(log_S0=logS0, log_w0=logw0, log_Q=logQ) if integrated: kernel_int = terms.IntegratedTerm(kernel, exp_time) gp = GP(kernel_int, t, yerr**2) else: gp = GP(kernel, t, yerr**2) gp.condition(y) mu, var = xo.eval_in_model(gp.predict(t_grid, return_var=True)) sd = np.sqrt(var) y_pred = xo.eval_in_model(gp.predict(t)) return y_pred, mu, sd
with pm.Model() as model: mean = pm.Normal("mean", mu=0.0, sigma=1.0) S1 = pm.InverseGamma( "S1", **estimate_inverse_gamma_parameters(0.5**2, 10.0**2)) S2 = pm.InverseGamma( "S2", **estimate_inverse_gamma_parameters(0.25**2, 1.0**2)) w1 = pm.InverseGamma( "w1", **estimate_inverse_gamma_parameters(2 * np.pi / 10.0, np.pi)) w2 = pm.InverseGamma( "w2", **estimate_inverse_gamma_parameters(0.5 * np.pi, 2 * np.pi)) log_Q = pm.Uniform("log_Q", lower=np.log(2), upper=np.log(10)) # Set up the kernel an GP kernel = terms.SHOTerm(S_tot=S1, w0=w1, Q=1.0 / np.sqrt(2)) kernel += terms.SHOTerm(S_tot=S2, w0=w2, log_Q=log_Q) gp = GP(kernel, t, yerr**2, mean=mean) # Condition the GP on the observations and add the marginal likelihood # to the model gp.marginal("gp", observed=y) with model: map_soln = xo.optimize(start=model.test_point) with model: mu, var = xo.eval_in_model( gp.predict(true_t, return_var=True, predict_mean=True), map_soln) # Plot the prediction and the 1-sigma uncertainty
def build_model(mask=None, start=None): with pm.Model() as model: # The baseline flux mean = pm.Normal("mean", mu=0.0, sd=0.00001) # The time of a reference transit for each planet t0 = pm.Normal("t0", mu=t0s, sd=1.0, shape=1) # The log period; also tracking the period itself logP = pm.Normal("logP", mu=np.log(periods), sd=0.01, shape=1) rho_star = pm.Normal("rho_star", mu=0.14, sd=0.01, shape=1) r_star = pm.Normal("r_star", mu=2.7, sd=0.01, shape=1) period = pm.Deterministic("period", pm.math.exp(logP)) # The Kipping (2013) parameterization for quadratic limb darkening paramters u = xo.distributions.QuadLimbDark("u", testval=np.array([0.3, 0.2])) r = pm.Uniform("r", lower=0.01, upper=0.3, shape=1, testval=0.15) b = xo.distributions.ImpactParameter("b", ror=r, shape=1, testval=0.5) # Transit jitter & GP parameters logs2 = pm.Normal("logs2", mu=np.log(np.var(y)), sd=10) logw0 = pm.Normal("logw0", mu=0, sd=10) logSw4 = pm.Normal("logSw4", mu=np.log(np.var(y)), sd=10) # Set up a Keplerian orbit for the planets orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b, rho_star=rho_star, r_star=r_star) # Compute the model light curve using starry light_curves = xo.LimbDarkLightCurve(u).get_light_curve(orbit=orbit, r=r, t=t) light_curve = pm.math.sum(light_curves, axis=-1) + mean # Here we track the value of the model light curve for plotting # purposes pm.Deterministic("light_curves", light_curves) S1 = pm.InverseGamma( "S1", **estimate_inverse_gamma_parameters(0.5**2, 10.0**2)) S2 = pm.InverseGamma( "S2", **estimate_inverse_gamma_parameters(0.25**2, 1.0**2)) w1 = pm.InverseGamma( "w1", **estimate_inverse_gamma_parameters(2 * np.pi / 10.0, np.pi)) w2 = pm.InverseGamma( "w2", **estimate_inverse_gamma_parameters(0.5 * np.pi, 2 * np.pi)) log_Q = pm.Uniform("log_Q", lower=np.log(2), upper=np.log(10)) # Set up the kernel an GP kernel = terms.SHOTerm(S_tot=S1, w0=w1, Q=1.0 / np.sqrt(2)) kernel += terms.SHOTerm(S_tot=S2, w0=w2, log_Q=log_Q) gp = GP(kernel, t, yerr**2, mean=mean) gp.marginal("gp", observed=y) pm.Deterministic("gp_pred", gp.predict()) # The likelihood function assuming known Gaussian uncertainty pm.Normal("obs", mu=light_curve, sd=yerr, observed=y) # Fit for the maximum a posteriori parameters given the simuated # dataset map_soln = xo.optimize(start=model.test_point) return model, map_soln
loglike0 = -np.sum(np.log(np.diag(factor[0]))) loglike0 -= 0.5 * len(x) * np.log(2 * np.pi) loglike0 -= 0.5 * np.dot(y, cho_solve(factor, y)) assert np.allclose(loglike, loglike0) @pytest.mark.parametrize( "kernel", [ terms.RealTerm(log_a=0.1, log_c=0.5), terms.RealTerm(log_a=0.1, log_c=0.5) + terms.RealTerm(log_a=-0.1, log_c=0.7), terms.ComplexTerm(log_a=0.1, b=0.0, log_c=0.5, log_d=0.1), terms.ComplexTerm(log_a=0.1, log_b=-0.2, log_c=0.5, log_d=0.1), terms.SHOTerm(log_S0=0.1, log_Q=-1, log_w0=0.5), terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_w0=0.5), terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_w0=0.5) + terms.RealTerm(log_a=0.1, log_c=0.4), terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_w0=0.5) * terms.RealTerm(log_a=0.1, log_c=0.4), terms.Matern32Term(log_sigma=0.1, log_rho=0.4), ], ) def test_integrated(kernel, seed=1234): np.random.seed(seed) x = np.sort(np.random.uniform(0, 100, 100)) dt = 0.4 * np.min(np.diff(x)) y = np.sin(x) yerr = np.random.uniform(0.1, 0.5, len(x)) diag = yerr**2
def run_gp_single(Sgv, wgv, S1v, w1v, Q1v, opt=opt): if (opt == 1): print('Running Gp Single Optimiziation', 'Sgv', Sgv, 'wgv', wgv, 'S1v', S1v, 'w1v', w1v, 'Q1v', Q1v) with pm.Model() as model: logs2 = pm.Normal("logs2", mu=2 * np.log(np.mean(yerr)), sigma=100.0, testval=100) logSg = pm.Normal("logSg", mu=Sgv, sigma=100.0, testval=Sgv) logwg = pm.Normal("logwg", mu=wgv, sigma=100.0, testval=wgv) logS1 = pm.Normal("logS1", mu=S1v, sigma=100.0, testval=S1v) logw1 = pm.Normal("logw1", mu=w1v, sigma=100.0, testval=w1v) logQ1 = pm.Normal("logQ1", mu=Q1v, sigma=100.0, testval=Q1v) # Set up the kernel an GP bg_kernel = terms.SHOTerm(log_S0=logSg, log_w0=logwg, Q=1.0 / np.sqrt(2)) star_kernel1 = terms.SHOTerm(log_S0=logS1, log_w0=logw1, log_Q=logQ1) kernel = star_kernel1 + bg_kernel gp = GP(kernel, t, yerr**2 + pm.math.exp(logs2)) gp_star1 = GP(star_kernel1, t, yerr**2 + pm.math.exp(logs2)) gp_bg = GP(bg_kernel, t, yerr**2 + pm.math.exp(logs2)) # Condition the GP on the observations and add the marginal likelihood # to the model gp.marginal("gp", observed=y) with model: val = gp.kernel.psd(omega) psd_init = xo.eval_in_model(val) bg_val = gp_bg.kernel.psd(omega) star_val_1 = gp_star1.kernel.psd(omega) bg_psd_init = xo.eval_in_model(bg_val) star_1_psd_init = xo.eval_in_model(star_val_1) # print('done_init_plot') map_soln = model.test_point if (opt == 1): map_soln = xo.optimize(start=map_soln, vars=[logSg]) #ask about this, do i need to scale when I show this? map_soln = xo.optimize(start=map_soln, vars=[logwg]) map_soln = xo.optimize(start=map_soln, vars=[logw1]) map_soln = xo.optimize(start=map_soln, vars=[logS1]) map_soln = xo.optimize(start=map_soln) print(map_soln.values()) mu, var = xo.eval_in_model(gp.predict(t, return_var=True), map_soln) plt.figure() plt.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0, label="data") sd = np.sqrt(var) art = plt.fill_between(t, mu + sd, mu - sd, color="C1", alpha=0.3) art.set_edgecolor("none") plt.plot(t, mu, color="C1", label="prediction") plt.legend(fontsize=12) plt.xlabel("t") plt.ylabel("y") plt.xlim(0, 10) _ = plt.ylim(-2.5, 2.5) psd_final = xo.eval_in_model(gp.kernel.psd(omega), map_soln) bg_psd_fin = xo.eval_in_model(bg_val, map_soln) star_1_psd_fin = xo.eval_in_model(star_val_1, map_soln) return psd_init, star_1_psd_init, bg_psd_init, psd_final, star_1_psd_fin, bg_psd_fin, map_soln
def run_gp_binary(Sg, wg, S1, w1, Q1, S2, w2, Q2, opt=opt): with pm.Model() as model: logs2 = pm.Normal("logs2", mu=2 * np.log(np.mean(yerr)), sigma=100.0, testval=-100) mean = pm.Normal("mean", mu=np.mean(y), sigma=1.0) logSg = pm.Normal("logSg", mu=0.0, sigma=15.0, testval=Sg) logwg = pm.Normal("logwg", mu=0.0, sigma=15.0, testval=wg - np.log(1e6)) logS1 = pm.Normal("logS1", mu=0.0, sigma=15.0, testval=S1) logw1 = pm.Normal("logw1", mu=0.0, sigma=15.0, testval=w1 - np.log(1e6)) logQ1 = pm.Normal("logQ1", mu=0.0, sigma=15.0, testval=Q1) logS2 = pm.Normal("logS2", mu=0.0, sigma=15.0, testval=S2) logw2 = pm.Normal("logw2", mu=0.0, sigma=15.0, testval=w2 - np.log(1e6)) logQ2 = pm.Normal("logQ2", mu=0.0, sigma=15.0, testval=Q2) # Set up the kernel an GP bg_kernel = terms.SHOTerm(log_S0=logSg, log_w0=logwg, Q=1.0 / np.sqrt(2)) star_kernel1 = terms.SHOTerm(log_S0=logS1, log_w0=logw1, log_Q=logQ1) star_kernel2 = terms.SHOTerm(log_S0=logS2, log_w0=logw2, log_Q=logQ2) kernel = star_kernel1 + star_kernel2 + bg_kernel gp = GP(kernel, t, yerr**2 + pm.math.exp(logs2), mean=mean) gp_star1 = GP(star_kernel1, t, yerr**2 + pm.math.exp(logs2), mean=mean) gp_bg = GP(bg_kernel, t, yerr**2 + pm.math.exp(logs2), mean=mean) gp_star2 = GP(star_kernel2, t, yerr**2 + pm.math.exp(logs2), mean=mean) # Condition the GP on the observations and add the marginal likelihood # to the model gp.marginal("gp", observed=y) with model: val = gp.kernel.psd(omega) psd_init = xo.eval_in_model(val) bg_val = gp_bg.kernel.psd(omega) star_val_1 = gp_star1.kernel.psd(omega) star_val_2 = gp_star2.kernel.psd(omega) bg_psd_init = xo.eval_in_model(bg_val) star_1_psd_init = xo.eval_in_model(star_val_1) star_2_psd_init = xo.eval_in_model(star_val_2) # print('done_init_plot') map_soln = model.test_point if (opt == 1): print('running opt') map_soln = xo.optimize(start=map_soln, vars=[logSg]) #ask about this, do i need to scale when I show this? map_soln = xo.optimize(start=map_soln, vars=[logwg]) #map_soln = xo.optimize(start=map_soln, vars=[logS1,logw1]) #map_soln = xo.optimize(start=map_soln, vars=[logS2,logw2]) psd_final = xo.eval_in_model(gp.kernel.psd(omega), map_soln) bg_psd_fin = xo.eval_in_model(bg_val, map_soln) star_1_psd_fin = xo.eval_in_model(star_val_1, map_soln) star_2_psd_fin = xo.eval_in_model(star_val_2, map_soln) return psd_init, star_1_psd_init, star_2_psd_init, bg_psd_init, psd_final, star_1_psd_fin, star_2_psd_fin, bg_psd_fin, map_soln
# %% import pymc3 as pm import theano.tensor as tt from exoplanet.gp import terms, GP with pm.Model() as model: mean = pm.Normal("mean", mu=0.0, sigma=1.0) logS1 = pm.Normal("logS1", mu=0.0, sigma=15.0, testval=np.log(np.var(y))) logw1 = pm.Normal("logw1", mu=0.0, sigma=15.0, testval=np.log(3.0)) logS2 = pm.Normal("logS2", mu=0.0, sigma=15.0, testval=np.log(np.var(y))) logw2 = pm.Normal("logw2", mu=0.0, sigma=15.0, testval=np.log(3.0)) logQ = pm.Normal("logQ", mu=0.0, sigma=15.0, testval=0) # Set up the kernel an GP kernel = terms.SHOTerm(log_S0=logS1, log_w0=logw1, Q=1.0 / np.sqrt(2)) kernel += terms.SHOTerm(log_S0=logS2, log_w0=logw2, log_Q=logQ) gp = GP(kernel, t, yerr**2, mean=mean) # Condition the GP on the observations and add the marginal likelihood # to the model gp.marginal("gp", observed=y) # %% [markdown] # A few comments here: # # 1. The `term` interface in *exoplanet* only accepts keyword arguments with names given by the `parameter_names` property of the term. But it will also interpret keyword arguments with the name prefaced by `log_` to be the log of the parameter. For example, in this case, we used `log_S0` as the parameter for each term, but `S0=tt.exp(log_S0)` would have been equivalent. This is useful because many of the parameters are required to be positive so fitting the log of those parameters is often best. # 2. The third argument to the :class:`exoplanet.gp.GP` constructor should be the *variance* to add along the diagonal, not the standard deviation as in the original [celerite implementation](https://celerite.readthedocs.io). # 3. Finally, the :class:`exoplanet.gp.GP` constructor takes an optional argument `J` which specifies the width of the problem if it is known at compile time. Just to be confusing, this is actually two times the `J` from [the celerite paper](https://arxiv.org/abs/1703.09710). There are various technical reasons why this is difficult to work out in general and this code will always work if you don't provide a value for `J`, but you can get much better performance (especially for small `J`) if you know what it will be for your problem. In general, most terms cost `J=2` with the exception of a :class:`exoplanet.gp.terms.RealTerm` (which costs `J=1`) and a :class:`exoplanet.gp.terms.RotationTerm` (which costs `J=4`). # # To start, let's fit for the maximum a posteriori (MAP) parameters and look the the predictions that those make.