Esempio n. 1
0
 def test_peculiar_blobs(self, data):
     sampler = emcee.EnsembleSampler(6, 1, lambda x: (-(x ** 2), (np.random.normal(x), 3)))
     sampler.run_mcmc(np.random.normal(size=(6, 1)), 20)
     inference_data = from_emcee(sampler, blob_names=["normal", "threes"])
     fails = check_multiple_attrs({"sample_stats": ["normal", "threes"]}, inference_data)
     assert not fails
     inference_data = from_emcee(data.obj, blob_names=["mix"])
     fails = check_multiple_attrs({"sample_stats": ["mix"]}, inference_data)
     assert not fails
Esempio n. 2
0
 def test_single_blob(self):
     sampler = emcee.EnsembleSampler(6, 1, lambda x: (-(x**2), 3))
     sampler.run_mcmc(np.random.normal(size=(6, 1)), 20)
     inference_data = from_emcee(sampler, blob_names=["blob"])
     fails = check_multiple_attrs({"sample_stats": ["blob"]},
                                  inference_data)
     assert not fails
Esempio n. 3
0
    def fit(self,
            num_samples=500,
            num_burnin=200,
            num_walkers=30,
            num_cores=None):

        # figure out a good place to begin our sampling
        p0 = self.gen_start_point(num_walkers)

        # instantiate context appropriate for the number of cores requested
        if num_cores is not None:
            poolholder = Pool(processes=num_cores)
        else:
            poolholder = nullcontext()
        with poolholder as pool:
            # generate sampler
            print('number of priors is ' + str(len(self.prior)))
            print('number of parameters is ' + str(p0.shape[1]))
            sampler = emcee.EnsembleSampler(num_walkers,
                                            p0.shape[1],
                                            self.logp,
                                            moves=emcee.moves.DESnookerMove(),
                                            pool=pool)
            if num_burnin > 0:
                p0 = sampler.run_mcmc(p0, num_burnin, progress=True)
                sampler.reset()
            sampler.run_mcmc(p0, num_samples, progress=True)

        print("Mean acceptance fraction: {0:.3f}".format(
            np.mean(sampler.acceptance_fraction)))
        #assert(False)

        # define variable names, it cannot be inferred from emcee
        var_names = self.gather_parameter_names()
        return arviz.from_emcee(sampler, var_names=var_names)
Esempio n. 4
0
    def get_inference_data_reader(self, **kwargs):
        from emcee import backends  # pylint: disable=no-name-in-module

        here = os.path.dirname(os.path.abspath(__file__))
        data_directory = os.path.join(here, "..", "saved_models")
        filepath = os.path.join(data_directory, "reader_testfile.h5")
        assert os.path.exists(filepath)
        assert os.path.getsize(filepath)
        reader = backends.HDFBackend(filepath, read_only=True)
        return from_emcee(reader, **kwargs)
Esempio n. 5
0
 def posterior_samples(self):
     dd = az.from_emcee(self.sampler, var_names=self.ps.names)
     ds = xa.Dataset()
     pst = dd.posterior
     c = pst.rho.coords
     DA = xa.DataArray
     for i in range(1, self.nplanets + 1):
         p = pst[f'p_{i}'].values
         ds[f'k_{i}'] = k = DA(sqrt(pst[f'k2_{i}']), coords=c)
         ds[f'a_{i}'] = a = DA(as_from_rhop(pst.rho.values, p), coords=c)
         ds[f'e_{i}'] = e = DA(pst[f'secw_{i}']**2 + pst[f'sesw_{i}']**2,
                               coords=c)
         ds[f'w_{i}'] = w = DA(arctan2(pst[f'sesw_{i}'], pst[f'secw_{i}']),
                               coords=c)
         ds[f'i_{i}'] = inc = DA(i_from_baew(pst[f'b_{i}'].values, a.values,
                                             e.values, w.values),
                                 coords=c)
         ds[f't14_{i}'] = DA(d_from_pkaiews(p,
                                            k.values,
                                            a.values,
                                            inc.values,
                                            0.,
                                            0.,
                                            1,
                                            kind=14),
                             coords=c)
         ds[f't23_{i}'] = DA(d_from_pkaiews(p,
                                            k.values,
                                            a.values,
                                            inc.values,
                                            0.,
                                            0.,
                                            1,
                                            kind=23),
                             coords=c)
     dd.add_groups({'derived_parameters': ds})
     return dd
Esempio n. 6
0
    def fit(self,
            num_samples=1000,
            num_burnin=200,
            num_walkers=30,
            num_cores=None):

        try:
            import emcee
        except ModuleNotFoundError as e:
            raise ModuleNotFoundError(
                "Bayesian estimation requires emcee to be installed.") from e

        # figure out a good place to begin our sampling
        p0 = self.gen_start_point(num_walkers)

        # instantiate context appropriate for the number of cores requested
        if num_cores is not None:
            poolholder = Pool(processes=num_cores)
        else:
            poolholder = nullcontext()
        with poolholder as pool:
            # generate sampler
            print('number of priors is ' + str(len(self.prior)))
            sampler = emcee.EnsembleSampler(num_walkers,
                                            len(p0.shape[0]),
                                            self.logp,
                                            moves=emcee.moves.DESnookerMove(),
                                            pool=pool)
            if num_burnin > 0:
                p0 = sampler.run_mcmc(p0, num_burnin, progress=True)
                sampler.reset()
            sampler.run_mcmc(p0, nsamples, progress=True)

        # define variable names, it cannot be inferred from emcee
        var_names = self.gather_parameter_names()
        return arviz.from_emcee(sampler, var_names=var_names)
Esempio n. 7
0
 def test_no_blobs_error(self):
     sampler = emcee.EnsembleSampler(6, 1, lambda x: -(x**2))
     sampler.run_mcmc(np.random.normal(size=(6, 1)), 20)
     with pytest.raises(ValueError):
         from_emcee(sampler, blob_names=["inexistent"])
Esempio n. 8
0
 def test_bad_blobs(self, data, blob_args):
     error, names, groups = blob_args
     with pytest.raises(error):
         from_emcee(data.obj, blob_names=names, blob_groups=groups)
Esempio n. 9
0
 def test_verify_arg_names(self, data):
     with pytest.raises(ValueError):
         from_emcee(data.obj, arg_names=["not enough"])
Esempio n. 10
0
 def test_slices_warning(self, data, slices):
     with pytest.warns(UserWarning):
         from_emcee(data.obj, slices=slices)
Esempio n. 11
0
plt.plot(x0, np.dot(np.vander(x0, 2), w), "--k", label="LS")
plt.plot(x0, np.dot(np.vander(x0, 2), [m_ml, b_ml]), ":k", label="ML")
plt.legend(fontsize=14)
plt.xlim(0, 10)
plt.xlabel("x")
plt.ylabel("y")

#plt.show()
plt.savefig('test_emcee_2_fig1.svg', format='svg', dpi=1200)
plt.close()

# now arviz plots
import arviz as az

var_names = ['m', 'b', 'f']
emcee_data = az.from_emcee(sampler, var_names=var_names)
az.plot_posterior(emcee_data, var_names=var_names[:])
#plt.show()
plt.savefig('test_emcee_2_fig2.svg', format='svg', dpi=1200)
plt.close()

# now trace plot
az.plot_trace(emcee_data, var_names=var_names)
#plt.show()
plt.savefig('test_emcee_2_fig3.svg', format='svg', dpi=1200)
plt.close()

az.plot_pair(emcee_data, var_names=var_names, kind='kde')
#plt.show()
plt.savefig('test_emcee_2_fig4.svg', format='svg', dpi=1200)
plt.close()
Esempio n. 12
0
 def test_inference_data(self, data, test_args):
     kwargs, test_dict = test_args
     inference_data = from_emcee(data.obj, **kwargs)
     fails = check_multiple_attrs(test_dict, inference_data)
     assert not fails
Esempio n. 13
0
 def test__verify_arg_names(self):
     with pytest.raises(ValueError):
         from_emcee(self.obj, arg_names=['not', 'enough'])
Esempio n. 14
0
 def test__verify_arg_names(self, obj):
     with pytest.raises(ValueError):
         from_emcee(obj, arg_names=["not", "enough"])
Esempio n. 15
0
 def test__verify_var_names(self):
     with pytest.raises(ValueError):
         from_emcee(self.obj, var_names=["not", "enough"])
    return like + prior


nwalkers, draws = 50, 700
ndim = J + 2
pos = np.random.normal(size=(nwalkers, ndim))
pos[:, 1] = np.absolute(pos[:, 1])

sampler = emcee.EnsembleSampler(nwalkers,
                                ndim,
                                lnprob_8school,
                                args=(y_obs, sigma))

sampler.run_mcmc(pos, draws)
var_names = ['mu', 'tau'] + ['eta{}'.format(i) for i in range(J)]
emcee_data = az.from_emcee(sampler,
                           var_names=var_names).sel(draw=slice(100, None))
az.plot_posterior(emcee_data, var_names=var_names[:3])
#plt.show()
plt.savefig('test_arviz_fig1.svg', format='svg', dpi=1200)
plt.close()

emcee_data = az.from_emcee(sampler, slices=[0, 1, slice(2, None)])
az.plot_trace(emcee_data, var_names=["var_2"], coords={"var_2_dim_0": 4})
#plt.show()
plt.savefig('test_arviz_fig2.svg', format='svg', dpi=1200)
plt.close()


def lnprob_8school_blobs(theta, y, s):
    prior = log_prior_8school(theta)
    like_vect = log_likelihood_8school(theta, y, s)
Esempio n. 17
0
        break
    old_tau = tau

#%%

# gather 100 more samples to ensure clean chains
sampler.run_mcmc(backend.get_last_sample(), 100, progress=True)

#%%

import arviz as az
import corner

# plot chains
reader = emcee.backends.HDFBackend("example_chain.hdf5")
full_data = az.from_emcee(reader, var_names=model.labels)
az.plot_trace(full_data)

#%%

# remove burn-in data and thin and replot
tau = reader.get_autocorr_time(tol=0)
burnin = int(tau.max())
thin = int(0.3 * np.min(tau))
burn_samples = reader.get_chain(discard=burnin, thin=thin)
log_prob_samples = reader.get_log_prob(discard=burnin, thin=thin)
log_prior_samples = reader.get_blobs(discard=burnin, thin=thin)

dd = dict(zip(model.labels, burn_samples.T))
burn_data = az.from_dict(dd)
Esempio n. 18
0
 def get_inference_data(self, obj):
     return from_emcee(obj, var_names=["ln(f)", "b", "m"])
    #%matplotlib qt5
    analisis = Graficador(reader, ['omega_m', 'b', 'H0'], 'HS CC+H0')
    analisis.graficar_contornos(sol, discard=burnin, thin=thin, poster=True)
    #%%
    analisis.graficar_cadenas()
    analisis.reportar_intervalos(sol)
    #%%
    analisis.graficar_taus_vs_n(num_param=None)
    #	analisis.graficar_taus_vs_n(num_param=1)
    #	analisis.graficar_taus_vs_n(num_param=2)

    #%% Forma alternativa de graficar, seguir investigando!
    #%matplotlib qt5
    reader = emcee.backends.HDFBackend(filename)
    samples = reader.get_chain(discard=burnin, flat=True, thin=thin)
    emcee_data = az.from_emcee(reader,
                               var_names=['$\Omega_{m}$', 'b', '$H_{0}$'])
    emcee_data
    az.plot_pair(
        emcee_data,
        kind='kde',
        #kde_kwargs={"fill_last": False, 'bw':'scott','levels':[1-0.95, 1-0.68]},
        contour=True,
        divergences=True,
        marginals=True,
        point_estimate='mean',
        textsize=18)
    #%%
    az.plot_posterior(emcee_data)
    plt.show()

    #%%
Esempio n. 20
0
    def get_inference_data(self):

        return from_emcee(self.obj, var_names=['ln(f)', 'b', 'm'])
sampler = emcee.EnsembleSampler(nwalkers,
                                ndim,
                                log_probability,
                                args=(Data_x, Data_y))
sampler.run_mcmc(pos, NSAMPLES, progress=True)
flat_samples = sampler.get_chain(discard=100, thin=15, flat=True)
blobs = sampler.get_blobs(discard=100, thin=15, flat=True)
inds = np.random.randint(len(flat_samples), size=100)

if plot:
    print("Plot 3")
    # now plotting
    # now arviz plots
    var_names = ['m', 'b', 's']
    emcee_data = az.from_emcee(sampler,
                               var_names=var_names,
                               blob_names=["silly"])
    postplot = az.plot_posterior(emcee_data,
                                 var_names=var_names[:],
                                 textsize=60,
                                 hdi_prob=0.66)
    postplot[0].set_xlabel("Intercept", fontsize=60)
    postplot[1].set_xlabel("Slope", fontsize=60)
    postplot[2].set_xlabel("Error", fontsize=60)
    #plt.show()
    plt.savefig('test_full_analysis_fig3.pdf', format='pdf', dpi=1200)
    plt.close()

    # now trace plot
    print("Plot 4")
    plottrace = az.plot_trace(emcee_data,
Esempio n. 22
0
# -

# ## Comparison
#
# Finally, let's compare the results of these different inference methods a bit more quantitaively.
# First, let's look at the posterior constraint on the period of the underdamped harmonic oscillator, the effective period of the oscillatory signal.

# +
import arviz as az

emcee_data = az.from_emcee(
    sampler,
    var_names=[
        "mean",
        "log_sigma1",
        "log_rho1",
        "log_tau",
        "log_sigma2",
        "log_rho2",
        "log_jitter",
    ],
)
for k in emcee_data.posterior.data_vars:
    if k.startswith("log_"):
        emcee_data.posterior[k[4:]] = np.exp(emcee_data.posterior[k])

with model:
    pm_data = az.from_pymc3(trace)

numpyro_data = az.from_numpyro(mcmc)

bins = np.linspace(1.5, 2.75, 25)
Esempio n. 23
0
    def posterior_samples(self,
                          burn: int = 0,
                          thin: int = 1,
                          derived_parameters: bool = True,
                          arviz: bool = False):
        if not arviz:
            df = super().posterior_samples(burn=burn, thin=thin)
            if derived_parameters:
                for k2c in df.columns[self._sl_k2]:
                    df[k2c.replace('k2', 'k')] = sqrt(df[k2c])
                df['a'] = as_from_rhop(df.rho.values, df.p.values)
                df['inc'] = i_from_baew(df.b.values, df.a.values, 0., 0.)

                average_ks = sqrt(df.iloc[:, self._sl_k2]).mean(1).values
                df['t14'] = d_from_pkaiews(df.p.values,
                                           average_ks,
                                           df.a.values,
                                           df.inc.values,
                                           0.,
                                           0.,
                                           1,
                                           kind=14)
                df['t23'] = d_from_pkaiews(df.p.values,
                                           average_ks,
                                           df.a.values,
                                           df.inc.values,
                                           0.,
                                           0.,
                                           1,
                                           kind=23)
            return df
        else:
            dd = az.from_emcee(self.sampler, var_names=self.ps.names)
            ds = xa.Dataset()
            pst = dd.posterior
            ds['k'] = sqrt(pst.k2)
            ds['a'] = xa.DataArray(as_from_rhop(pst.rho.values, pst.p.values),
                                   coords=pst.k2.coords)
            ds['inc'] = xa.DataArray(i_from_baew(pst.b.values, ds.a.values, 0.,
                                                 0.),
                                     coords=pst.k2.coords)
            ds['t14'] = xa.DataArray(d_from_pkaiews(pst.p.values,
                                                    ds.k.values,
                                                    ds.a.values,
                                                    ds.inc.values,
                                                    0.,
                                                    0.,
                                                    1,
                                                    kind=14),
                                     coords=pst.k2.coords)
            ds['t23'] = xa.DataArray(d_from_pkaiews(pst.p.values,
                                                    ds.k.values,
                                                    ds.a.values,
                                                    ds.inc.values,
                                                    0.,
                                                    0.,
                                                    1,
                                                    kind=23),
                                     coords=pst.k2.coords)
            dd.add_groups({'derived_parameters': ds})
            return dd