Exemple #1
0
def plot_ppc(predictive, y, S=1000, title=None):
    fig, ax = plt.subplots(figsize=(12, 8))
    y_sampled = predictive.rvs(size=(S, y.shape[0]), random_state=1)
    linewidth = 4
    plot_kde(
        y.flatten(),
        label="Observed",
        plot_kwargs={"color": "k", "linewidth": linewidth, "zorder": 3},
        fill_kwargs={"alpha": 0},
        ax=ax,
    )
    pp_densities = []
    pp_xs = []
    for vals in y_sampled:
        vals = np.array([vals]).flatten()
        pp_x, pp_density = kde(vals)
        pp_densities.append(pp_density)
        pp_xs.append(pp_x)

    ax.plot(
        np.transpose(pp_xs),
        np.transpose(pp_densities),
        **{"color": 'b', "alpha": 0.1, "linewidth": 0.15 * linewidth},
    )
    ax.plot([], color='b', label="Posterior predictive")

    plt.xlabel('y')
    plt.xlabel('density')
    if title:
        plt.title(title)
    plt.legend()
    plt.show()
def make_plot_panel(dims, sigmas, student_prior=False, standardize=False):
    N = len(dims)
    fig, axes = plt.subplots(1,
                             N,
                             figsize=(N * 3, 5),
                             sharex=True,
                             sharey=True)
    axes = np.ravel(axes)
    np.random.seed(0)
    nbetas = 10000  # num random parameters to try
    ndata = 500  # num. observations for each beta
    for i in range(N):
        dim = dims[i]
        ax = axes[i]
        sigma = sigmas[i]
        if student_prior:
            df = 3  # 1=Cauchy
            prior = stats.t(df, 0, sigma)
        else:
            prior = stats.norm(0, sigma)
        β = prior.rvs((nbetas, dim))
        X = np.random.binomial(n=1, p=0.8, size=(dim, ndata))
        #X = stats.norm(0, 1).rvs((dim, ndata))
        if standardize:
            #X = 2*X - 1 # map from [0,1] to [-1,1]
            #X = X*0.5 # map to [-0.5, 0.5]
            scaler = StandardScaler()
            X = scaler.fit_transform(X.T).T
        ys = np.random.binomial(n=1, p=expit(β @ X))  # size nbetas * ndata
        az.plot_kde(ys.mean(1), ax=ax)  # mean over ndata, kde over nbetas
        if student_prior:
            ax.set_title("{:d} predictors, std={:0.2f}, student prior".format(
                dim, sigma))
        else:
            ax.set_title("{:d} predictors, std={:0.2f}".format(dim, sigma))
Exemple #3
0
def plot_gi(posterior_samples,
            mean_varname="gi_mean",
            sd_varname="gi_sd",
            newfig=True):
    if newfig:
        plt.figure(figsize=(6, 3), dpi=300)
    plt.subplot(121)
    az.plot_kde(posterior_samples[mean_varname], ax=plt.gca())
    plt.ylabel("density")
    plt.xlabel("$\mu_{GI}$")
    plt.gca().set_ylim(bottom=0)

    plt.subplot(122)
    az.plot_kde(posterior_samples[sd_varname], ax=plt.gca())
    plt.ylabel("density")
    plt.xlabel("$\sigma_{GI}$")
    plt.tight_layout()
    plt.gca().set_ylim(bottom=0)
Exemple #4
0
    def plot_joint_posterior(self,
                             plotters,
                             iteration=-1,
                             kind='kde',
                             **joint_kwargs):

        # Set labels for axes
        x_var_name = make_label(plotters[0][0], plotters[0][1])
        y_var_name = make_label(plotters[1][0], plotters[1][1])

        self.axjoin.set_xlabel(x_var_name, fontsize=self.ax_labelsize)
        self.axjoin.set_ylabel(y_var_name, fontsize=self.ax_labelsize)
        self.axjoin.tick_params(labelsize=self.xt_labelsize)

        # Flatten data
        x = plotters[0][2].flatten()[:iteration]
        y = plotters[1][2].flatten()[:iteration]

        if kind == "scatter":
            self.axjoin.scatter(x, y, **joint_kwargs)
        elif kind == "kde":
            if False:
                gridsize = (128, 128)  # if contour else (256, 256)

                density, xmin, xmax, ymin, ymax = _fast_kde_2d(
                    x, y, gridsize=gridsize)

                #   self.axjoin.scatter(x, y, density)
                self.axjoin.imshow(density)
            else:
                if 'contour' not in joint_kwargs:
                    joint_kwargs.setdefault('contour', True)
                fill_last = joint_kwargs.get('fill_last', False)

                try:
                    self.foo = plot_kde(x,
                                        y,
                                        fill_last=fill_last,
                                        ax=self.axjoin,
                                        **joint_kwargs)
                except ValueError:
                    pass
                except np.linalg.LinAlgError:
                    pass
        else:
            gridsize = joint_kwargs.get('grid_size', 'auto')
            if gridsize == "auto":
                gridsize = int(len(x)**0.35)
            self.axjoin.hexbin(x,
                               y,
                               mincnt=1,
                               gridsize=gridsize,
                               **joint_kwargs)
            self.axjoin._grid(False)
Exemple #5
0
def plot_pit(model, x, y, S=1000, title=None):
    p_is = []
    for mask in range(x.shape[0]):
        x_i = np.concatenate([
            x[0:mask], x[mask+1:]
        ])
        y_i = np.concatenate([
            y[0:mask], y[mask+1:]
        ])
        model_i = _get_new_model_instance(model)
        model_i.fit(x_i, y_i)
        p_is.append(model_i.predict(x[mask]).cdf(y[mask]))
    linewidth = 4
    fig, ax = plt.subplots(figsize=(12, 8))
    plot_kde(
        np.array(p_is),
        label="PIT density",
        plot_kwargs={"color": "k", "linewidth": linewidth, "zorder": 3},
        fill_kwargs={"alpha": 0},
        ax=ax,
    )

    uni_densities = []
    uni_xs = []
    rng = np.random.default_rng(1)
    for _ in range(S):
        uni_x, uni_density = kde(rng.random(y.shape[0]))
        uni_densities.append(uni_density)
        uni_xs.append(uni_x)

    ax.plot(
        np.transpose(uni_xs),
        np.transpose(uni_densities),
        **{"color": 'b', "alpha": 0.1, "linewidth": 0.15 * linewidth},
    )
    ax.plot([], color='b', label="Uniform empirical densities")
    if title:
        plt.title(title)
    plt.legend()
    plt.show()
Exemple #6
0
def analyze_post(post, method):
    print_summary(post, 0.95, False)
    fig, ax = plt.subplots()
    az.plot_forest(post, hdi_prob=0.95, figsize=(10, 4), ax=ax)
    plt.title(method)
    pml.savefig(f'multicollinear_forest_plot_{method}.pdf')
    plt.show()

    # post = m6_1.sample_posterior(random.PRNGKey(1), p6_1, (1000,))
    fig, ax = plt.subplots()
    az.plot_pair(post, var_names=["br", "bl"],
                 scatter_kwargs={"alpha": 0.1}, ax=ax)
    pml.savefig(f'multicollinear_joint_post_{method}.pdf')
    plt.title(method)
    plt.show()

    sum_blbr = post["bl"] + post["br"]
    fig, ax = plt.subplots()
    az.plot_kde(sum_blbr, label="sum of bl and br", ax=ax)
    plt.title(method)
    pml.savefig(f'multicollinear_sum_post_{method}.pdf')
    plt.show()
Exemple #7
0
def bayesEstimation(_smoothingWindow, _raw):

    #_raw = _raw[:500]#Calls the processing function#FOR DEBUG, SINGAL WHEN, THESE IS DATA FROM WHEN THE MACHINE IS NOT RUNNNING
    X = sglProcessing(_raw, _smoothingWindow)  #Calls the processing function
    print(np.mean(X))

    n_samples = 1000

    with pm.Model() as model:

        mu = pm.Normal('mu', mu=50, sd=1)
        mu = 50
        sigma = pm.HalfNormal("sigma", sd=30)
        estimation = pm.Normal("estimation", mu=mu, sd=sigma, observed=X)
        trace = pm.sample(n_samples)
        print("Showing the plots")
        az.plot_kde(X, rug=True)
        plt.yticks([0], alpha=0)
        plt.show()
        pm.traceplot(trace, legend=True)

        print(az.summary(trace))
        print(
            "----------------------------------------------------------------------"
        )
        plt.show()
        az.plot_posterior(trace)
        plt.title("posterior")
        plt.show()

        ppc = pm.sample_posterior_predictive(trace, samples=10, model=model)
        print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
        plt.plot(ppc['estimation'].T)
        plt.show()

        az.plot_kde(ppc['estimation'].T)
        az.plot_kde(X, rug=True)
        plt.title("simulated data dist")
        plt.show()
import numpyro.distributions as dist
from numpyro.infer import Predictive

import arviz as az

from jax.scipy.special import expit
from functools import partial

### Model with just offset term


def model_meta(prior_std, obs=None):
    a = numpyro.sample("a", dist.Normal(0, prior_std))
    numpyro.sample("obs", dist.Binomial(logits=a), obs=obs)


fig, ax = plt.subplots()
colors = ['r', 'k']
for i, sigma in enumerate([1.5, 10]):
    model = partial(model_meta, sigma)
    prior = Predictive(model, num_samples=10000)(random.PRNGKey(1999))
    p = expit(prior["a"])
    label = r'variance={:0.2f}$'.format(sigma)
    az.plot_kde(p,
                ax=ax,
                plot_kwargs={'color': colors[i]},
                label=label,
                legend=True)
pml.savefig('logreg_prior_offset.pdf', dpi=300)
plt.show()
# we can center the data
#x = x - x.mean()
# or standardize the data
#x = (x - x.mean())/x.std()
#y = (y - y.mean())/y.std()


# In[4]:


_, ax = plt.subplots(1, 2, figsize=(8, 4))
ax[0].plot(x, y, 'C0.')
ax[0].set_xlabel('x')
ax[0].set_ylabel('y', rotation=0)
ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('y')
plt.tight_layout()
plt.savefig('B11197_03_02.png', dpi=300)


# In[5]:


with pm.Model() as model_g:
    α = pm.Normal('α', mu=0, sd=10)
    β = pm.Normal('β', mu=0, sd=1)
    ϵ = pm.HalfCauchy('ϵ', 5)

    μ = pm.Deterministic('μ', α + β * x)
    y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y)
"""
KDE quantiles Bokeh
===================

_thumb: .2, .8
"""
import arviz as az
import numpy as np

dist = np.random.beta(np.random.uniform(0.5, 10), 5, size=1000)
ax = az.plot_kde(dist, quantiles=[0.25, 0.5, 0.75], backend="bokeh")
Exemple #11
0
"""
2d KDE (default style)
======================

_thumb: .1, .8
"""
import numpy as np

import arviz as az

ax = az.plot_kde(np.random.rand(100), np.random.rand(100), backend="bokeh")
Exemple #12
0
b_dist = stats.beta(a=2, b=5)
c_dist = [stats.norm(-8, 0.75), stats.norm(8, 1)]
d_dist = stats.norm(0, 1)
e_dist = stats.uniform(-1, 1)
a = a_dist.rvs(size)
a = np.arctan2(np.sin(a), np.cos(a))
b = b_dist.rvs(size)
c = np.concatenate((c_dist[0].rvs(7000), c_dist[1].rvs(3000)))
d = d_dist.rvs(size)
e = e_dist.rvs(size)


ax[0, 0].set_title('ArviZ')
ax[0, 1].set_title('Scipy')

for idx, (i, dist) in enumerate(zip([d, a, c, b, e], [d_dist, a_dist, c_dist, b_dist,  e_dist] )):
    x = np.linspace(i.min()+0.01, i.max()-0.01, 200)
    if idx == 2:
        x_dist = np.concatenate((dist[0].pdf(x[:100]) * 0.7, dist[1].pdf(x[100:]) * 0.3))
    else:
        x_dist = dist.pdf(x)

    ax[idx, 0].plot(x, x_dist, 'C0', lw=2) 
    az.plot_kde(i, ax=ax[idx, 0], bw=bw, textsize=11, plot_kwargs={'color':'C1', 'linewidth':2})
    ax[idx, 0].set_yticks([])
    ax[idx, 0].hist(i, bins='auto', alpha=0.2, density=True)

    ax[idx, 1].plot(x, x_dist, 'C0', lw=2) 
    scipykdeplot(i, ax=ax[idx, 1], color='C1', lw=2)
    ax[idx, 1].set_yticks([])
    ax[idx, 1].hist(i, bins='auto', alpha=0.2, density=True)
Exemple #13
0
plt.rc('xtick', labelsize=SIZE_SMALL)  # fontsize of the tick labels
plt.rc('ytick', labelsize=SIZE_SMALL)  # fontsize of the tick labels
plt.rc('legend', fontsize=SIZE_SMALL)  # legend fontsize
plt.rc('figure', titlesize=SIZE_LARGE)  # fontsize of the figure title

np.random.seed(0)

xs = (np.linspace(0, 20, 200), np.linspace(0, 1, 200), np.linspace(-4, 4, 200))
dists = (stats.expon(scale=5), stats.beta(0.5, 0.5), stats.norm(0, 1))
fig, ax = plt.subplots(3, 3, figsize=(10, 10))
for idx, (dist, x) in enumerate(zip(dists, xs)):
    draws = dist.rvs(100000)
    data = dist.cdf(draws)
    ax[idx, 0].plot(x, dist.pdf(x))
    ax[idx, 1].plot(np.sort(data), np.linspace(0, 1, len(data)))
    az.plot_kde(data, ax=ax[idx, 2])
    if idx == 0:
        ax[idx, 0].set_title('pdf(X)')
        ax[idx, 1].set_title('cdf(Y)')
        ax[idx, 2].set_title('pdf(Y)')

plt.tight_layout()
pml.savefig('ecdf_sample.pdf', dpi=300)
plt.show()

for idx, (dist, x) in enumerate(zip(dists, xs)):
    draws = dist.rvs(100000)
    data = dist.cdf(draws)
    plt.figure()
    plt.plot(x, dist.pdf(x))
    if idx == 0: plt.title('pdf(X)')
Exemple #14
0
# %%
_, ax = plt.subplots(1, 2, figsize=(12, 5), constrained_layout=True)
# show first 100 populations in the posterior
xrange = np.linspace(-3, 4, 200)
postcurve = [
    stats.norm.pdf(xrange, loc=trace_12_2["a"][i], scale=trace_12_2["sigma"][i])
    for i in range(100)
]
ax[0].plot(xrange, np.asarray(postcurve).T, alpha=0.1, color="k")
ax[0].set_xlabel("log-odds survive")
ax[0].set_ylabel("Density")
# sample 8000 imaginary tanks from the posterior distribution
sim_tanks = np.random.normal(loc=trace_12_2["a"], scale=trace_12_2["sigma"])
# transform to probability and visualize
az.plot_kde(logistic(sim_tanks), ax=ax[1], plot_kwargs={"color": "k"})
ax[1].set_xlabel("probability survive")
ax[1].set_ylabel("Density")

# %%
a, sigma, nponds = 1.4, 1.5, 60
ni = np.repeat([5, 10, 25, 35], 15)

# %%
a_pond = np.random.normal(loc=a, scale=sigma, size=nponds)

# %%
dsim = pd.DataFrame(dict(pond=np.arange(nponds), ni=ni, true_a=a_pond))

# %%
dsim.loc[:, "si"] = np.random.binomial(dsim["ni"], logistic(dsim["true_a"]))
Exemple #15
0
fig, ax = plt.subplots(1, 2, figsize=(10, 3), constrained_layout=True)


def iqr(x, a=0):
    return np.subtract(*np.percentile(x, [75, 25], axis=a))


for idx, func in enumerate([np.mean, iqr]):
    T_obs = func(y_1s)
    ax[idx].axvline(T_obs, 0, 1, color='k', ls='--')
    for d_sim, c in zip([y_l, y_p], ['C1', 'C2']):
        T_sim = func(d_sim, 1)
        p_value = np.mean(T_sim >= T_obs)
        az.plot_kde(T_sim,
                    plot_kwargs={'color': c},
                    label=f'p-value {p_value:.2f}',
                    ax=ax[idx])
    ax[idx].set_title(func.__name__)
    ax[idx].set_yticks([])
    ax[idx].legend()
plt.savefig('B11197_05_04.png', dpi=300)

# # Occam's razor – simplicity and accuracy

# In[9]:

x = np.array([4., 5., 6., 9., 12, 14.])
y = np.array([4.2, 6., 6., 9., 10, 10.])

plt.figure(figsize=(10, 5))
order = [0, 1, 2, 5]
    for i in range(50): # posterior samples
        i_ = np.random.randint(0, len(trace_x))
        means_y = trace_x['means'][i_]
        p_y = trace_x['p'][i_]
        sd = trace_x['sd'][i_]
        dist = stats.norm(means_y, sd)
        ax[idx].plot(x, np.sum(dist.pdf(x_) * p_y, 1), 'C0', alpha=0.1)
 
    means_y = trace_x['means'].mean(0)
    p_y = trace_x['p'].mean(0)
    sd = trace_x['sd'].mean()
    dist = stats.norm(means_y, sd)
    ax[idx].plot(x, np.sum(dist.pdf(x_) * p_y, 1), 'C0', lw=2)
    ax[idx].plot(x, dist.pdf(x_) * p_y, 'k--', alpha=0.7)
         
    az.plot_kde(data, plot_kwargs={'linewidth':2, 'color':'k'}, ax=ax[idx])
    ax[idx].set_title('K = {}'.format(clusters[idx]))
    ax[idx].set_yticks([])
    ax[idx].set_xlabel('x')
pml.savefig('gmm_chooseK_pymc3_kde.pdf')

# Posteroior predictive check

nclusters = len(clusters)
ppc_mm = [pm.sample_posterior_predictive(traces[i], 1000, models[i])
          for i in range(nclusters)]

fig, ax = plt.subplots(2, 2, figsize=(10, 6), sharex=True, constrained_layout=True)
ax = np.ravel(ax)
def iqr(x, a=0):
    return np.subtract(*np.percentile(x, [75, 25], axis=a))
Exemple #17
0
import jax.numpy as jnp
from jax import random, vmap

rng_key = random.PRNGKey(0)
rng_key, rng_key_ = random.split(rng_key)

import numpyro
import numpyro.distributions as dist

import arviz as az

import pyprobml_utils as pml

eta_list = [1, 2, 4]
colors = ['r', 'k', 'b']
fig, ax = plt.subplots()
for i, eta in enumerate(eta_list):
    R = dist.LKJ(dimension=2,
                 concentration=eta).sample(random.PRNGKey(0), (int(1e4), ))
    az.plot_kde(R[:, 0, 1],
                label=f"eta={eta}",
                plot_kwargs={'color': colors[i]})
plt.legend()
ax.set_xlabel('correlation')
ax.set_ylabel('density')
ax.set_ylim(0, 1.2)
ax.set_xlim(-1.1, 1.1)
pml.savefig('LKJ_1d_correlation.pdf', dpi=300)
plt.show()
Exemple #18
0
"""
KDE Plot Bokeh
==============

_thumb: .2, .8
"""
import bokeh.plotting as bkp
import numpy as np

import arviz as az

data = az.load_arviz_data("centered_eight")

# Combine posterior draws for from xarray of (4,500) to ndarray (2000,)
y_hat = np.concatenate(data.posterior_predictive["obs"].values)

figure_kwargs = dict(height=500, width=500, output_backend="webgl")
ax = bkp.figure(**figure_kwargs)

ax = az.plot_kde(
    y_hat,
    label="Estimated Effect\n of SAT Prep",
    rug=True,
    plot_kwargs={"line_width": 2, "line_color": "black"},
    rug_kwargs={"line_color": "black"},
    backend="bokeh",
    ax=ax,
)
    print("Plot 5")
    az.plot_pair(emcee_data,
                 var_names=var_names,
                 kind='kde',
                 marginals=True,
                 point_estimate="mean",
                 textsize=60)  #, kde_kwargs={"hdi_probs":[0.68,0.95,0.997]})
    #plt.show()
    plt.savefig('test_full_analysis_fig5.pdf', format='pdf', dpi=1200)
    plt.close()

    print("Plot_5.1")
    ax = az.plot_kde(
        flat_samples[:, 0],
        flat_samples[:, 1],
        hdi_probs=[0.393, 0.865, 0.989],  # 1, 2 and 3 sigma contours
        contourf_kwargs={"cmap": "Blues"},
    )

    ax.set_aspect("equal")
    plt.savefig('test_full_analysis_fig5.1.pdf', format='pdf', dpi=1200)
    plt.close()
    #    print(flat_samples)
    #    print(blobs)
    #    print(blobs[0,:])
    #    print(blobs[:,0]) # this is the ppd of the first data

    print("Plot 6")
    inds = np.random.randint(len(flat_samples), size=100)
    for ind in inds:
        sample = flat_samples[ind]
Exemple #20
0
"""
2d KDE (custom style)
=====================

_thumb: .1, .8
"""
import matplotlib.pyplot as plt
import numpy as np

import arviz as az

az.style.use("arviz-darkgrid")

az.plot_kde(
    np.random.beta(2, 5, size=100),
    np.random.beta(2, 5, size=100),
    contour_kwargs={
        "colors": None,
        "cmap": plt.cm.viridis,
        "levels": 30
    },
    contourf_kwargs={
        "alpha": 0.5,
        "levels": 30
    },
)

plt.show()
import matplotlib.pyplot as plt
import arviz as az
import pyprobml_utils as pml

np.random.seed(42)


#url = 'https://github.com/aloctavodia/BAP/tree/master/code/data/chemical_shifts_theo_exp.csv?raw=true'
# There is some error reading the abvoe file
# Error tokenizing data. C error: Expected 1 fields in line 71, saw 2
# So we make a copy here
url = 'https://raw.githubusercontent.com/probml/probml-data/main/data/chemical_shifts_theo_exp.csv'
df = pd.read_csv(url, sep=',')
obs = df['exp']

az.plot_kde(obs)
plt.hist(obs, density=True, bins=30, alpha=0.3)
plt.yticks([])
pml.savefig('gmm_pymc3_data.pdf', dpi=300)

# Illustrate unidentifiability

clusters = 2
with pm.Model() as model_mg:
    p = pm.Dirichlet('p', a=np.ones(clusters))
    means = pm.Normal('means', mu=obs.mean(), sd=10, shape=clusters)
    sd = pm.HalfNormal('sd', sd=10)
    y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=obs)
    trace_mg = pm.sample(random_seed=123)

varnames = ['means', 'p']
# %%
def sim_p(G=1.4):
    x123 = np.random.uniform(size=3)
    x4 = G * np.sum(x123) - x123[1] - x123[2] / (2 - G)
    x1234 = np.concatenate((x123, [x4]))
    z = np.sum(x1234)
    p = x1234 / z
    return -np.sum(p * np.log(p)), p


# %%
H = []
p = np.zeros((10**5, 4))
for rep in range(10**5):
    h, p_ = sim_p()
    H.append(h)
    p[rep] = p_

# %%
az.plot_kde(H)
plt.xlabel("Entropy")
plt.ylabel("Density")

# %%
np.max(H)

# %%
p[np.argmax(H)]

# %%
Exemple #23
0
"""
KDE quantiles
=============

_thumb: .2, .8
"""
import matplotlib.pyplot as plt
import numpy as np

import arviz as az

az.style.use("arviz-darkgrid")

dist = np.random.beta(np.random.uniform(0.5, 10), 5, size=1000)
az.plot_kde(dist, quantiles=[0.25, 0.5, 0.75])

plt.show()
Exemple #24
0
"""
2d KDE
======

_thumb: .1, .8
"""
import arviz as az
import numpy as np

az.style.use('arviz-darkgrid')

az.plot_kde(np.random.rand(100), np.random.rand(100))
Exemple #25
0
"""
KDE Plot
========

_thumb: .2, .8
"""
import matplotlib.pyplot as plt
import numpy as np

import arviz as az

az.style.use("arviz-darkgrid")

data = az.load_arviz_data("centered_eight")

# Combine posterior draws for from xarray of (4,500) to ndarray (2000,)
y_hat = np.concatenate(data.posterior_predictive["obs"].values)

ax = az.plot_kde(
    y_hat,
    label="Estimated Effect\n of SAT Prep",
    rug=True,
    plot_kwargs={
        "linewidth": 2,
        "color": "black"
    },
    rug_kwargs={"color": "black"},
)
plt.show()
# ## Gaussian inferences

# In[13]:

data = np.loadtxt('../data/chemical_shifts.csv')

# remove outliers using the interquartile rule
#quant = np.percentile(data, [25, 75])
#iqr = quant[1] - quant[0]
#upper_b = quant[1] + iqr * 1.5
#lower_b = quant[0] - iqr * 1.5
#data = data[(data > lower_b) & (data < upper_b)]
#print(np.mean(data), np.std(data))

az.plot_kde(data, rug=True)
plt.yticks([0], alpha=0)
plt.savefig('B11197_02_07.png', dpi=300)

#  <img src="B11197_02_08.png" width="500">

# In[14]:

with pm.Model() as model_g:
    μ = pm.Uniform('μ', lower=40, upper=70)
    σ = pm.HalfNormal('σ', sd=10)
    y = pm.Normal('y', mu=μ, sd=σ, observed=data)
    trace_g = pm.sample(3000)

az.plot_trace(trace_g)
# %%
N_visits = 10
afternoon = np.tile([0, 1], N_visits * N_cafes //
                    2)  # wrap with int() to suppress warnings
cafe_id = np.repeat(np.arange(0, N_cafes),
                    N_visits)  # 1-20 (minus 1 for python indexing)

mu = a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma = 0.5  # std dev within cafes
wait = np.random.normal(loc=mu, scale=sigma, size=N_visits * N_cafes)
d = pd.DataFrame(dict(cafe=cafe_id, afternoon=afternoon, wait=wait))

# %%
R = pm.LKJCorr.dist(n=2, eta=2).random(size=10000)
_, ax = plt.subplots(1, 1, figsize=(5, 5))
az.plot_kde(R)
ax.set_xlabel("correlation")
ax.set_ylabel("Density")

# %%
_, ax = plt.subplots(1, 1, figsize=(5, 5))
textloc = [[0, 0.5], [0, 0.8], [0.5, 0.9]]
for eta, loc in zip([1, 2, 4], textloc):
    R = pm.LKJCorr.dist(n=2, eta=eta).random(size=10000)
    az.plot_kde(R)
    ax.text(loc[0], loc[1], "eta = %s" % (eta), horizontalalignment="center")

ax.set_ylim(0, 1.1)
ax.set_xlabel("correlation")
ax.set_ylabel("Density")
Exemple #28
0
    # standardize the posterior, so it sums to 1
    posterior = unstd_posterior / unstd_posterior.sum()
    return p_grid, posterior


# %%
p_grid, posterior = posterior_grid_approx(grid_points=100, success=6, tosses=9)
samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True)

# %%
_, (ax0, ax1) = plt.subplots(1, 2, figsize=(12, 6))
ax0.plot(samples, "o", alpha=0.2)
ax0.set_xlabel("sample number")
ax0.set_ylabel("proportion water (p)")
az.plot_kde(samples, ax=ax1)
ax1.set_xlabel("proportion water (p)")
ax1.set_ylabel("density")

# %%
sum(posterior[p_grid < 0.5])

# %%
sum(samples < 0.5) / 1e4

# %%
sum((samples > 0.5) & (samples < 0.75)) / 1e4

# %%
np.percentile(samples, 80)
eps_real = np.random.normal(0, noiseSD, size=N)

x = np.random.normal(10, 1, N)  # centered on 10
y_real = alpha_real + beta_real * x
y = y_real + eps_real

# save untransformed data for later
x_orig = x
y_orig = y

_, ax = plt.subplots(1, 2, figsize=(8, 4))
ax[0].plot(x, y, 'C0.')
ax[0].set_xlabel('x')
ax[0].set_ylabel('y', rotation=0)
ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('y')
plt.tight_layout()

# Fit posterior with MCMC instead of analytically (for simplicity and flexibility)
# This is the same as BAP code, except we fix the noise variance to a constant.

with pm.Model() as model_g:
    w0 = pm.Normal('w0', mu=0, sd=10)
    w1 = pm.Normal('w1', mu=0, sd=1)
    #ϵ = pm.HalfCauchy('ϵ', 5)
    mu = pm.Deterministic('mu', w0 + w1 * x)
    #y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y)
    y_pred = pm.Normal('y_pred', mu=mu, sd=noiseSD, observed=y)
    trace_g = pm.sample(1000, cores=1, chains=2)
Exemple #30
0
         label="Quadratic approximation")
plt.legend(loc=0)

plt.title(f"n = {n}")
plt.xlabel("Proportion water")

# %%
n_samples = 10000
p = np.zeros(n_samples)
p[0] = 0.5
W = 6
L = 3
for i in range(1, n_samples):
    p_new = stats.norm(p[i - 1], 0.1).rvs(1)
    if p_new < 0:
        p_new = -p_new
    if p_new > 1:
        p_new = 2 - p_new
    q0 = stats.binom.pmf(W, n=W + L, p=p[i - 1])
    q1 = stats.binom.pmf(W, n=W + L, p=p_new)
    if stats.uniform.rvs(0, 1) < q1 / q0:
        p[i] = p_new
    else:
        p[i] = p[i - 1]

# %%
az.plot_kde(p, label="Metropolis approximation")
x = np.linspace(0, 1, 100)
plt.plot(x, stats.beta.pdf(x, W + 1, L + 1), "C1", label="True posterior")
plt.legend()