def test_peculiar_blobs(self, data): sampler = emcee.EnsembleSampler(6, 1, lambda x: (-(x ** 2), (np.random.normal(x), 3))) sampler.run_mcmc(np.random.normal(size=(6, 1)), 20) inference_data = from_emcee(sampler, blob_names=["normal", "threes"]) fails = check_multiple_attrs({"sample_stats": ["normal", "threes"]}, inference_data) assert not fails inference_data = from_emcee(data.obj, blob_names=["mix"]) fails = check_multiple_attrs({"sample_stats": ["mix"]}, inference_data) assert not fails
def test_single_blob(self): sampler = emcee.EnsembleSampler(6, 1, lambda x: (-(x**2), 3)) sampler.run_mcmc(np.random.normal(size=(6, 1)), 20) inference_data = from_emcee(sampler, blob_names=["blob"]) fails = check_multiple_attrs({"sample_stats": ["blob"]}, inference_data) assert not fails
def fit(self, num_samples=500, num_burnin=200, num_walkers=30, num_cores=None): # figure out a good place to begin our sampling p0 = self.gen_start_point(num_walkers) # instantiate context appropriate for the number of cores requested if num_cores is not None: poolholder = Pool(processes=num_cores) else: poolholder = nullcontext() with poolholder as pool: # generate sampler print('number of priors is ' + str(len(self.prior))) print('number of parameters is ' + str(p0.shape[1])) sampler = emcee.EnsembleSampler(num_walkers, p0.shape[1], self.logp, moves=emcee.moves.DESnookerMove(), pool=pool) if num_burnin > 0: p0 = sampler.run_mcmc(p0, num_burnin, progress=True) sampler.reset() sampler.run_mcmc(p0, num_samples, progress=True) print("Mean acceptance fraction: {0:.3f}".format( np.mean(sampler.acceptance_fraction))) #assert(False) # define variable names, it cannot be inferred from emcee var_names = self.gather_parameter_names() return arviz.from_emcee(sampler, var_names=var_names)
def get_inference_data_reader(self, **kwargs): from emcee import backends # pylint: disable=no-name-in-module here = os.path.dirname(os.path.abspath(__file__)) data_directory = os.path.join(here, "..", "saved_models") filepath = os.path.join(data_directory, "reader_testfile.h5") assert os.path.exists(filepath) assert os.path.getsize(filepath) reader = backends.HDFBackend(filepath, read_only=True) return from_emcee(reader, **kwargs)
def posterior_samples(self): dd = az.from_emcee(self.sampler, var_names=self.ps.names) ds = xa.Dataset() pst = dd.posterior c = pst.rho.coords DA = xa.DataArray for i in range(1, self.nplanets + 1): p = pst[f'p_{i}'].values ds[f'k_{i}'] = k = DA(sqrt(pst[f'k2_{i}']), coords=c) ds[f'a_{i}'] = a = DA(as_from_rhop(pst.rho.values, p), coords=c) ds[f'e_{i}'] = e = DA(pst[f'secw_{i}']**2 + pst[f'sesw_{i}']**2, coords=c) ds[f'w_{i}'] = w = DA(arctan2(pst[f'sesw_{i}'], pst[f'secw_{i}']), coords=c) ds[f'i_{i}'] = inc = DA(i_from_baew(pst[f'b_{i}'].values, a.values, e.values, w.values), coords=c) ds[f't14_{i}'] = DA(d_from_pkaiews(p, k.values, a.values, inc.values, 0., 0., 1, kind=14), coords=c) ds[f't23_{i}'] = DA(d_from_pkaiews(p, k.values, a.values, inc.values, 0., 0., 1, kind=23), coords=c) dd.add_groups({'derived_parameters': ds}) return dd
def fit(self, num_samples=1000, num_burnin=200, num_walkers=30, num_cores=None): try: import emcee except ModuleNotFoundError as e: raise ModuleNotFoundError( "Bayesian estimation requires emcee to be installed.") from e # figure out a good place to begin our sampling p0 = self.gen_start_point(num_walkers) # instantiate context appropriate for the number of cores requested if num_cores is not None: poolholder = Pool(processes=num_cores) else: poolholder = nullcontext() with poolholder as pool: # generate sampler print('number of priors is ' + str(len(self.prior))) sampler = emcee.EnsembleSampler(num_walkers, len(p0.shape[0]), self.logp, moves=emcee.moves.DESnookerMove(), pool=pool) if num_burnin > 0: p0 = sampler.run_mcmc(p0, num_burnin, progress=True) sampler.reset() sampler.run_mcmc(p0, nsamples, progress=True) # define variable names, it cannot be inferred from emcee var_names = self.gather_parameter_names() return arviz.from_emcee(sampler, var_names=var_names)
def test_no_blobs_error(self): sampler = emcee.EnsembleSampler(6, 1, lambda x: -(x**2)) sampler.run_mcmc(np.random.normal(size=(6, 1)), 20) with pytest.raises(ValueError): from_emcee(sampler, blob_names=["inexistent"])
def test_bad_blobs(self, data, blob_args): error, names, groups = blob_args with pytest.raises(error): from_emcee(data.obj, blob_names=names, blob_groups=groups)
def test_verify_arg_names(self, data): with pytest.raises(ValueError): from_emcee(data.obj, arg_names=["not enough"])
def test_slices_warning(self, data, slices): with pytest.warns(UserWarning): from_emcee(data.obj, slices=slices)
plt.plot(x0, np.dot(np.vander(x0, 2), w), "--k", label="LS") plt.plot(x0, np.dot(np.vander(x0, 2), [m_ml, b_ml]), ":k", label="ML") plt.legend(fontsize=14) plt.xlim(0, 10) plt.xlabel("x") plt.ylabel("y") #plt.show() plt.savefig('test_emcee_2_fig1.svg', format='svg', dpi=1200) plt.close() # now arviz plots import arviz as az var_names = ['m', 'b', 'f'] emcee_data = az.from_emcee(sampler, var_names=var_names) az.plot_posterior(emcee_data, var_names=var_names[:]) #plt.show() plt.savefig('test_emcee_2_fig2.svg', format='svg', dpi=1200) plt.close() # now trace plot az.plot_trace(emcee_data, var_names=var_names) #plt.show() plt.savefig('test_emcee_2_fig3.svg', format='svg', dpi=1200) plt.close() az.plot_pair(emcee_data, var_names=var_names, kind='kde') #plt.show() plt.savefig('test_emcee_2_fig4.svg', format='svg', dpi=1200) plt.close()
def test_inference_data(self, data, test_args): kwargs, test_dict = test_args inference_data = from_emcee(data.obj, **kwargs) fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test__verify_arg_names(self): with pytest.raises(ValueError): from_emcee(self.obj, arg_names=['not', 'enough'])
def test__verify_arg_names(self, obj): with pytest.raises(ValueError): from_emcee(obj, arg_names=["not", "enough"])
def test__verify_var_names(self): with pytest.raises(ValueError): from_emcee(self.obj, var_names=["not", "enough"])
return like + prior nwalkers, draws = 50, 700 ndim = J + 2 pos = np.random.normal(size=(nwalkers, ndim)) pos[:, 1] = np.absolute(pos[:, 1]) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob_8school, args=(y_obs, sigma)) sampler.run_mcmc(pos, draws) var_names = ['mu', 'tau'] + ['eta{}'.format(i) for i in range(J)] emcee_data = az.from_emcee(sampler, var_names=var_names).sel(draw=slice(100, None)) az.plot_posterior(emcee_data, var_names=var_names[:3]) #plt.show() plt.savefig('test_arviz_fig1.svg', format='svg', dpi=1200) plt.close() emcee_data = az.from_emcee(sampler, slices=[0, 1, slice(2, None)]) az.plot_trace(emcee_data, var_names=["var_2"], coords={"var_2_dim_0": 4}) #plt.show() plt.savefig('test_arviz_fig2.svg', format='svg', dpi=1200) plt.close() def lnprob_8school_blobs(theta, y, s): prior = log_prior_8school(theta) like_vect = log_likelihood_8school(theta, y, s)
break old_tau = tau #%% # gather 100 more samples to ensure clean chains sampler.run_mcmc(backend.get_last_sample(), 100, progress=True) #%% import arviz as az import corner # plot chains reader = emcee.backends.HDFBackend("example_chain.hdf5") full_data = az.from_emcee(reader, var_names=model.labels) az.plot_trace(full_data) #%% # remove burn-in data and thin and replot tau = reader.get_autocorr_time(tol=0) burnin = int(tau.max()) thin = int(0.3 * np.min(tau)) burn_samples = reader.get_chain(discard=burnin, thin=thin) log_prob_samples = reader.get_log_prob(discard=burnin, thin=thin) log_prior_samples = reader.get_blobs(discard=burnin, thin=thin) dd = dict(zip(model.labels, burn_samples.T)) burn_data = az.from_dict(dd)
def get_inference_data(self, obj): return from_emcee(obj, var_names=["ln(f)", "b", "m"])
#%matplotlib qt5 analisis = Graficador(reader, ['omega_m', 'b', 'H0'], 'HS CC+H0') analisis.graficar_contornos(sol, discard=burnin, thin=thin, poster=True) #%% analisis.graficar_cadenas() analisis.reportar_intervalos(sol) #%% analisis.graficar_taus_vs_n(num_param=None) # analisis.graficar_taus_vs_n(num_param=1) # analisis.graficar_taus_vs_n(num_param=2) #%% Forma alternativa de graficar, seguir investigando! #%matplotlib qt5 reader = emcee.backends.HDFBackend(filename) samples = reader.get_chain(discard=burnin, flat=True, thin=thin) emcee_data = az.from_emcee(reader, var_names=['$\Omega_{m}$', 'b', '$H_{0}$']) emcee_data az.plot_pair( emcee_data, kind='kde', #kde_kwargs={"fill_last": False, 'bw':'scott','levels':[1-0.95, 1-0.68]}, contour=True, divergences=True, marginals=True, point_estimate='mean', textsize=18) #%% az.plot_posterior(emcee_data) plt.show() #%%
def get_inference_data(self): return from_emcee(self.obj, var_names=['ln(f)', 'b', 'm'])
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(Data_x, Data_y)) sampler.run_mcmc(pos, NSAMPLES, progress=True) flat_samples = sampler.get_chain(discard=100, thin=15, flat=True) blobs = sampler.get_blobs(discard=100, thin=15, flat=True) inds = np.random.randint(len(flat_samples), size=100) if plot: print("Plot 3") # now plotting # now arviz plots var_names = ['m', 'b', 's'] emcee_data = az.from_emcee(sampler, var_names=var_names, blob_names=["silly"]) postplot = az.plot_posterior(emcee_data, var_names=var_names[:], textsize=60, hdi_prob=0.66) postplot[0].set_xlabel("Intercept", fontsize=60) postplot[1].set_xlabel("Slope", fontsize=60) postplot[2].set_xlabel("Error", fontsize=60) #plt.show() plt.savefig('test_full_analysis_fig3.pdf', format='pdf', dpi=1200) plt.close() # now trace plot print("Plot 4") plottrace = az.plot_trace(emcee_data,
# - # ## Comparison # # Finally, let's compare the results of these different inference methods a bit more quantitaively. # First, let's look at the posterior constraint on the period of the underdamped harmonic oscillator, the effective period of the oscillatory signal. # + import arviz as az emcee_data = az.from_emcee( sampler, var_names=[ "mean", "log_sigma1", "log_rho1", "log_tau", "log_sigma2", "log_rho2", "log_jitter", ], ) for k in emcee_data.posterior.data_vars: if k.startswith("log_"): emcee_data.posterior[k[4:]] = np.exp(emcee_data.posterior[k]) with model: pm_data = az.from_pymc3(trace) numpyro_data = az.from_numpyro(mcmc) bins = np.linspace(1.5, 2.75, 25)
def posterior_samples(self, burn: int = 0, thin: int = 1, derived_parameters: bool = True, arviz: bool = False): if not arviz: df = super().posterior_samples(burn=burn, thin=thin) if derived_parameters: for k2c in df.columns[self._sl_k2]: df[k2c.replace('k2', 'k')] = sqrt(df[k2c]) df['a'] = as_from_rhop(df.rho.values, df.p.values) df['inc'] = i_from_baew(df.b.values, df.a.values, 0., 0.) average_ks = sqrt(df.iloc[:, self._sl_k2]).mean(1).values df['t14'] = d_from_pkaiews(df.p.values, average_ks, df.a.values, df.inc.values, 0., 0., 1, kind=14) df['t23'] = d_from_pkaiews(df.p.values, average_ks, df.a.values, df.inc.values, 0., 0., 1, kind=23) return df else: dd = az.from_emcee(self.sampler, var_names=self.ps.names) ds = xa.Dataset() pst = dd.posterior ds['k'] = sqrt(pst.k2) ds['a'] = xa.DataArray(as_from_rhop(pst.rho.values, pst.p.values), coords=pst.k2.coords) ds['inc'] = xa.DataArray(i_from_baew(pst.b.values, ds.a.values, 0., 0.), coords=pst.k2.coords) ds['t14'] = xa.DataArray(d_from_pkaiews(pst.p.values, ds.k.values, ds.a.values, ds.inc.values, 0., 0., 1, kind=14), coords=pst.k2.coords) ds['t23'] = xa.DataArray(d_from_pkaiews(pst.p.values, ds.k.values, ds.a.values, ds.inc.values, 0., 0., 1, kind=23), coords=pst.k2.coords) dd.add_groups({'derived_parameters': ds}) return dd