def plot_ppc(predictive, y, S=1000, title=None): fig, ax = plt.subplots(figsize=(12, 8)) y_sampled = predictive.rvs(size=(S, y.shape[0]), random_state=1) linewidth = 4 plot_kde( y.flatten(), label="Observed", plot_kwargs={"color": "k", "linewidth": linewidth, "zorder": 3}, fill_kwargs={"alpha": 0}, ax=ax, ) pp_densities = [] pp_xs = [] for vals in y_sampled: vals = np.array([vals]).flatten() pp_x, pp_density = kde(vals) pp_densities.append(pp_density) pp_xs.append(pp_x) ax.plot( np.transpose(pp_xs), np.transpose(pp_densities), **{"color": 'b', "alpha": 0.1, "linewidth": 0.15 * linewidth}, ) ax.plot([], color='b', label="Posterior predictive") plt.xlabel('y') plt.xlabel('density') if title: plt.title(title) plt.legend() plt.show()
def make_plot_panel(dims, sigmas, student_prior=False, standardize=False): N = len(dims) fig, axes = plt.subplots(1, N, figsize=(N * 3, 5), sharex=True, sharey=True) axes = np.ravel(axes) np.random.seed(0) nbetas = 10000 # num random parameters to try ndata = 500 # num. observations for each beta for i in range(N): dim = dims[i] ax = axes[i] sigma = sigmas[i] if student_prior: df = 3 # 1=Cauchy prior = stats.t(df, 0, sigma) else: prior = stats.norm(0, sigma) β = prior.rvs((nbetas, dim)) X = np.random.binomial(n=1, p=0.8, size=(dim, ndata)) #X = stats.norm(0, 1).rvs((dim, ndata)) if standardize: #X = 2*X - 1 # map from [0,1] to [-1,1] #X = X*0.5 # map to [-0.5, 0.5] scaler = StandardScaler() X = scaler.fit_transform(X.T).T ys = np.random.binomial(n=1, p=expit(β @ X)) # size nbetas * ndata az.plot_kde(ys.mean(1), ax=ax) # mean over ndata, kde over nbetas if student_prior: ax.set_title("{:d} predictors, std={:0.2f}, student prior".format( dim, sigma)) else: ax.set_title("{:d} predictors, std={:0.2f}".format(dim, sigma))
def plot_gi(posterior_samples, mean_varname="gi_mean", sd_varname="gi_sd", newfig=True): if newfig: plt.figure(figsize=(6, 3), dpi=300) plt.subplot(121) az.plot_kde(posterior_samples[mean_varname], ax=plt.gca()) plt.ylabel("density") plt.xlabel("$\mu_{GI}$") plt.gca().set_ylim(bottom=0) plt.subplot(122) az.plot_kde(posterior_samples[sd_varname], ax=plt.gca()) plt.ylabel("density") plt.xlabel("$\sigma_{GI}$") plt.tight_layout() plt.gca().set_ylim(bottom=0)
def plot_joint_posterior(self, plotters, iteration=-1, kind='kde', **joint_kwargs): # Set labels for axes x_var_name = make_label(plotters[0][0], plotters[0][1]) y_var_name = make_label(plotters[1][0], plotters[1][1]) self.axjoin.set_xlabel(x_var_name, fontsize=self.ax_labelsize) self.axjoin.set_ylabel(y_var_name, fontsize=self.ax_labelsize) self.axjoin.tick_params(labelsize=self.xt_labelsize) # Flatten data x = plotters[0][2].flatten()[:iteration] y = plotters[1][2].flatten()[:iteration] if kind == "scatter": self.axjoin.scatter(x, y, **joint_kwargs) elif kind == "kde": if False: gridsize = (128, 128) # if contour else (256, 256) density, xmin, xmax, ymin, ymax = _fast_kde_2d( x, y, gridsize=gridsize) # self.axjoin.scatter(x, y, density) self.axjoin.imshow(density) else: if 'contour' not in joint_kwargs: joint_kwargs.setdefault('contour', True) fill_last = joint_kwargs.get('fill_last', False) try: self.foo = plot_kde(x, y, fill_last=fill_last, ax=self.axjoin, **joint_kwargs) except ValueError: pass except np.linalg.LinAlgError: pass else: gridsize = joint_kwargs.get('grid_size', 'auto') if gridsize == "auto": gridsize = int(len(x)**0.35) self.axjoin.hexbin(x, y, mincnt=1, gridsize=gridsize, **joint_kwargs) self.axjoin._grid(False)
def plot_pit(model, x, y, S=1000, title=None): p_is = [] for mask in range(x.shape[0]): x_i = np.concatenate([ x[0:mask], x[mask+1:] ]) y_i = np.concatenate([ y[0:mask], y[mask+1:] ]) model_i = _get_new_model_instance(model) model_i.fit(x_i, y_i) p_is.append(model_i.predict(x[mask]).cdf(y[mask])) linewidth = 4 fig, ax = plt.subplots(figsize=(12, 8)) plot_kde( np.array(p_is), label="PIT density", plot_kwargs={"color": "k", "linewidth": linewidth, "zorder": 3}, fill_kwargs={"alpha": 0}, ax=ax, ) uni_densities = [] uni_xs = [] rng = np.random.default_rng(1) for _ in range(S): uni_x, uni_density = kde(rng.random(y.shape[0])) uni_densities.append(uni_density) uni_xs.append(uni_x) ax.plot( np.transpose(uni_xs), np.transpose(uni_densities), **{"color": 'b', "alpha": 0.1, "linewidth": 0.15 * linewidth}, ) ax.plot([], color='b', label="Uniform empirical densities") if title: plt.title(title) plt.legend() plt.show()
def analyze_post(post, method): print_summary(post, 0.95, False) fig, ax = plt.subplots() az.plot_forest(post, hdi_prob=0.95, figsize=(10, 4), ax=ax) plt.title(method) pml.savefig(f'multicollinear_forest_plot_{method}.pdf') plt.show() # post = m6_1.sample_posterior(random.PRNGKey(1), p6_1, (1000,)) fig, ax = plt.subplots() az.plot_pair(post, var_names=["br", "bl"], scatter_kwargs={"alpha": 0.1}, ax=ax) pml.savefig(f'multicollinear_joint_post_{method}.pdf') plt.title(method) plt.show() sum_blbr = post["bl"] + post["br"] fig, ax = plt.subplots() az.plot_kde(sum_blbr, label="sum of bl and br", ax=ax) plt.title(method) pml.savefig(f'multicollinear_sum_post_{method}.pdf') plt.show()
def bayesEstimation(_smoothingWindow, _raw): #_raw = _raw[:500]#Calls the processing function#FOR DEBUG, SINGAL WHEN, THESE IS DATA FROM WHEN THE MACHINE IS NOT RUNNNING X = sglProcessing(_raw, _smoothingWindow) #Calls the processing function print(np.mean(X)) n_samples = 1000 with pm.Model() as model: mu = pm.Normal('mu', mu=50, sd=1) mu = 50 sigma = pm.HalfNormal("sigma", sd=30) estimation = pm.Normal("estimation", mu=mu, sd=sigma, observed=X) trace = pm.sample(n_samples) print("Showing the plots") az.plot_kde(X, rug=True) plt.yticks([0], alpha=0) plt.show() pm.traceplot(trace, legend=True) print(az.summary(trace)) print( "----------------------------------------------------------------------" ) plt.show() az.plot_posterior(trace) plt.title("posterior") plt.show() ppc = pm.sample_posterior_predictive(trace, samples=10, model=model) print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") plt.plot(ppc['estimation'].T) plt.show() az.plot_kde(ppc['estimation'].T) az.plot_kde(X, rug=True) plt.title("simulated data dist") plt.show()
import numpyro.distributions as dist from numpyro.infer import Predictive import arviz as az from jax.scipy.special import expit from functools import partial ### Model with just offset term def model_meta(prior_std, obs=None): a = numpyro.sample("a", dist.Normal(0, prior_std)) numpyro.sample("obs", dist.Binomial(logits=a), obs=obs) fig, ax = plt.subplots() colors = ['r', 'k'] for i, sigma in enumerate([1.5, 10]): model = partial(model_meta, sigma) prior = Predictive(model, num_samples=10000)(random.PRNGKey(1999)) p = expit(prior["a"]) label = r'variance={:0.2f}$'.format(sigma) az.plot_kde(p, ax=ax, plot_kwargs={'color': colors[i]}, label=label, legend=True) pml.savefig('logreg_prior_offset.pdf', dpi=300) plt.show()
# we can center the data #x = x - x.mean() # or standardize the data #x = (x - x.mean())/x.std() #y = (y - y.mean())/y.std() # In[4]: _, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].plot(x, y, 'C0.') ax[0].set_xlabel('x') ax[0].set_ylabel('y', rotation=0) ax[0].plot(x, y_real, 'k') az.plot_kde(y, ax=ax[1]) ax[1].set_xlabel('y') plt.tight_layout() plt.savefig('B11197_03_02.png', dpi=300) # In[5]: with pm.Model() as model_g: α = pm.Normal('α', mu=0, sd=10) β = pm.Normal('β', mu=0, sd=1) ϵ = pm.HalfCauchy('ϵ', 5) μ = pm.Deterministic('μ', α + β * x) y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y)
""" KDE quantiles Bokeh =================== _thumb: .2, .8 """ import arviz as az import numpy as np dist = np.random.beta(np.random.uniform(0.5, 10), 5, size=1000) ax = az.plot_kde(dist, quantiles=[0.25, 0.5, 0.75], backend="bokeh")
""" 2d KDE (default style) ====================== _thumb: .1, .8 """ import numpy as np import arviz as az ax = az.plot_kde(np.random.rand(100), np.random.rand(100), backend="bokeh")
b_dist = stats.beta(a=2, b=5) c_dist = [stats.norm(-8, 0.75), stats.norm(8, 1)] d_dist = stats.norm(0, 1) e_dist = stats.uniform(-1, 1) a = a_dist.rvs(size) a = np.arctan2(np.sin(a), np.cos(a)) b = b_dist.rvs(size) c = np.concatenate((c_dist[0].rvs(7000), c_dist[1].rvs(3000))) d = d_dist.rvs(size) e = e_dist.rvs(size) ax[0, 0].set_title('ArviZ') ax[0, 1].set_title('Scipy') for idx, (i, dist) in enumerate(zip([d, a, c, b, e], [d_dist, a_dist, c_dist, b_dist, e_dist] )): x = np.linspace(i.min()+0.01, i.max()-0.01, 200) if idx == 2: x_dist = np.concatenate((dist[0].pdf(x[:100]) * 0.7, dist[1].pdf(x[100:]) * 0.3)) else: x_dist = dist.pdf(x) ax[idx, 0].plot(x, x_dist, 'C0', lw=2) az.plot_kde(i, ax=ax[idx, 0], bw=bw, textsize=11, plot_kwargs={'color':'C1', 'linewidth':2}) ax[idx, 0].set_yticks([]) ax[idx, 0].hist(i, bins='auto', alpha=0.2, density=True) ax[idx, 1].plot(x, x_dist, 'C0', lw=2) scipykdeplot(i, ax=ax[idx, 1], color='C1', lw=2) ax[idx, 1].set_yticks([]) ax[idx, 1].hist(i, bins='auto', alpha=0.2, density=True)
plt.rc('xtick', labelsize=SIZE_SMALL) # fontsize of the tick labels plt.rc('ytick', labelsize=SIZE_SMALL) # fontsize of the tick labels plt.rc('legend', fontsize=SIZE_SMALL) # legend fontsize plt.rc('figure', titlesize=SIZE_LARGE) # fontsize of the figure title np.random.seed(0) xs = (np.linspace(0, 20, 200), np.linspace(0, 1, 200), np.linspace(-4, 4, 200)) dists = (stats.expon(scale=5), stats.beta(0.5, 0.5), stats.norm(0, 1)) fig, ax = plt.subplots(3, 3, figsize=(10, 10)) for idx, (dist, x) in enumerate(zip(dists, xs)): draws = dist.rvs(100000) data = dist.cdf(draws) ax[idx, 0].plot(x, dist.pdf(x)) ax[idx, 1].plot(np.sort(data), np.linspace(0, 1, len(data))) az.plot_kde(data, ax=ax[idx, 2]) if idx == 0: ax[idx, 0].set_title('pdf(X)') ax[idx, 1].set_title('cdf(Y)') ax[idx, 2].set_title('pdf(Y)') plt.tight_layout() pml.savefig('ecdf_sample.pdf', dpi=300) plt.show() for idx, (dist, x) in enumerate(zip(dists, xs)): draws = dist.rvs(100000) data = dist.cdf(draws) plt.figure() plt.plot(x, dist.pdf(x)) if idx == 0: plt.title('pdf(X)')
# %% _, ax = plt.subplots(1, 2, figsize=(12, 5), constrained_layout=True) # show first 100 populations in the posterior xrange = np.linspace(-3, 4, 200) postcurve = [ stats.norm.pdf(xrange, loc=trace_12_2["a"][i], scale=trace_12_2["sigma"][i]) for i in range(100) ] ax[0].plot(xrange, np.asarray(postcurve).T, alpha=0.1, color="k") ax[0].set_xlabel("log-odds survive") ax[0].set_ylabel("Density") # sample 8000 imaginary tanks from the posterior distribution sim_tanks = np.random.normal(loc=trace_12_2["a"], scale=trace_12_2["sigma"]) # transform to probability and visualize az.plot_kde(logistic(sim_tanks), ax=ax[1], plot_kwargs={"color": "k"}) ax[1].set_xlabel("probability survive") ax[1].set_ylabel("Density") # %% a, sigma, nponds = 1.4, 1.5, 60 ni = np.repeat([5, 10, 25, 35], 15) # %% a_pond = np.random.normal(loc=a, scale=sigma, size=nponds) # %% dsim = pd.DataFrame(dict(pond=np.arange(nponds), ni=ni, true_a=a_pond)) # %% dsim.loc[:, "si"] = np.random.binomial(dsim["ni"], logistic(dsim["true_a"]))
fig, ax = plt.subplots(1, 2, figsize=(10, 3), constrained_layout=True) def iqr(x, a=0): return np.subtract(*np.percentile(x, [75, 25], axis=a)) for idx, func in enumerate([np.mean, iqr]): T_obs = func(y_1s) ax[idx].axvline(T_obs, 0, 1, color='k', ls='--') for d_sim, c in zip([y_l, y_p], ['C1', 'C2']): T_sim = func(d_sim, 1) p_value = np.mean(T_sim >= T_obs) az.plot_kde(T_sim, plot_kwargs={'color': c}, label=f'p-value {p_value:.2f}', ax=ax[idx]) ax[idx].set_title(func.__name__) ax[idx].set_yticks([]) ax[idx].legend() plt.savefig('B11197_05_04.png', dpi=300) # # Occam's razor – simplicity and accuracy # In[9]: x = np.array([4., 5., 6., 9., 12, 14.]) y = np.array([4.2, 6., 6., 9., 10, 10.]) plt.figure(figsize=(10, 5)) order = [0, 1, 2, 5]
for i in range(50): # posterior samples i_ = np.random.randint(0, len(trace_x)) means_y = trace_x['means'][i_] p_y = trace_x['p'][i_] sd = trace_x['sd'][i_] dist = stats.norm(means_y, sd) ax[idx].plot(x, np.sum(dist.pdf(x_) * p_y, 1), 'C0', alpha=0.1) means_y = trace_x['means'].mean(0) p_y = trace_x['p'].mean(0) sd = trace_x['sd'].mean() dist = stats.norm(means_y, sd) ax[idx].plot(x, np.sum(dist.pdf(x_) * p_y, 1), 'C0', lw=2) ax[idx].plot(x, dist.pdf(x_) * p_y, 'k--', alpha=0.7) az.plot_kde(data, plot_kwargs={'linewidth':2, 'color':'k'}, ax=ax[idx]) ax[idx].set_title('K = {}'.format(clusters[idx])) ax[idx].set_yticks([]) ax[idx].set_xlabel('x') pml.savefig('gmm_chooseK_pymc3_kde.pdf') # Posteroior predictive check nclusters = len(clusters) ppc_mm = [pm.sample_posterior_predictive(traces[i], 1000, models[i]) for i in range(nclusters)] fig, ax = plt.subplots(2, 2, figsize=(10, 6), sharex=True, constrained_layout=True) ax = np.ravel(ax) def iqr(x, a=0): return np.subtract(*np.percentile(x, [75, 25], axis=a))
import jax.numpy as jnp from jax import random, vmap rng_key = random.PRNGKey(0) rng_key, rng_key_ = random.split(rng_key) import numpyro import numpyro.distributions as dist import arviz as az import pyprobml_utils as pml eta_list = [1, 2, 4] colors = ['r', 'k', 'b'] fig, ax = plt.subplots() for i, eta in enumerate(eta_list): R = dist.LKJ(dimension=2, concentration=eta).sample(random.PRNGKey(0), (int(1e4), )) az.plot_kde(R[:, 0, 1], label=f"eta={eta}", plot_kwargs={'color': colors[i]}) plt.legend() ax.set_xlabel('correlation') ax.set_ylabel('density') ax.set_ylim(0, 1.2) ax.set_xlim(-1.1, 1.1) pml.savefig('LKJ_1d_correlation.pdf', dpi=300) plt.show()
""" KDE Plot Bokeh ============== _thumb: .2, .8 """ import bokeh.plotting as bkp import numpy as np import arviz as az data = az.load_arviz_data("centered_eight") # Combine posterior draws for from xarray of (4,500) to ndarray (2000,) y_hat = np.concatenate(data.posterior_predictive["obs"].values) figure_kwargs = dict(height=500, width=500, output_backend="webgl") ax = bkp.figure(**figure_kwargs) ax = az.plot_kde( y_hat, label="Estimated Effect\n of SAT Prep", rug=True, plot_kwargs={"line_width": 2, "line_color": "black"}, rug_kwargs={"line_color": "black"}, backend="bokeh", ax=ax, )
print("Plot 5") az.plot_pair(emcee_data, var_names=var_names, kind='kde', marginals=True, point_estimate="mean", textsize=60) #, kde_kwargs={"hdi_probs":[0.68,0.95,0.997]}) #plt.show() plt.savefig('test_full_analysis_fig5.pdf', format='pdf', dpi=1200) plt.close() print("Plot_5.1") ax = az.plot_kde( flat_samples[:, 0], flat_samples[:, 1], hdi_probs=[0.393, 0.865, 0.989], # 1, 2 and 3 sigma contours contourf_kwargs={"cmap": "Blues"}, ) ax.set_aspect("equal") plt.savefig('test_full_analysis_fig5.1.pdf', format='pdf', dpi=1200) plt.close() # print(flat_samples) # print(blobs) # print(blobs[0,:]) # print(blobs[:,0]) # this is the ppd of the first data print("Plot 6") inds = np.random.randint(len(flat_samples), size=100) for ind in inds: sample = flat_samples[ind]
""" 2d KDE (custom style) ===================== _thumb: .1, .8 """ import matplotlib.pyplot as plt import numpy as np import arviz as az az.style.use("arviz-darkgrid") az.plot_kde( np.random.beta(2, 5, size=100), np.random.beta(2, 5, size=100), contour_kwargs={ "colors": None, "cmap": plt.cm.viridis, "levels": 30 }, contourf_kwargs={ "alpha": 0.5, "levels": 30 }, ) plt.show()
import matplotlib.pyplot as plt import arviz as az import pyprobml_utils as pml np.random.seed(42) #url = 'https://github.com/aloctavodia/BAP/tree/master/code/data/chemical_shifts_theo_exp.csv?raw=true' # There is some error reading the abvoe file # Error tokenizing data. C error: Expected 1 fields in line 71, saw 2 # So we make a copy here url = 'https://raw.githubusercontent.com/probml/probml-data/main/data/chemical_shifts_theo_exp.csv' df = pd.read_csv(url, sep=',') obs = df['exp'] az.plot_kde(obs) plt.hist(obs, density=True, bins=30, alpha=0.3) plt.yticks([]) pml.savefig('gmm_pymc3_data.pdf', dpi=300) # Illustrate unidentifiability clusters = 2 with pm.Model() as model_mg: p = pm.Dirichlet('p', a=np.ones(clusters)) means = pm.Normal('means', mu=obs.mean(), sd=10, shape=clusters) sd = pm.HalfNormal('sd', sd=10) y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=obs) trace_mg = pm.sample(random_seed=123) varnames = ['means', 'p']
# %% def sim_p(G=1.4): x123 = np.random.uniform(size=3) x4 = G * np.sum(x123) - x123[1] - x123[2] / (2 - G) x1234 = np.concatenate((x123, [x4])) z = np.sum(x1234) p = x1234 / z return -np.sum(p * np.log(p)), p # %% H = [] p = np.zeros((10**5, 4)) for rep in range(10**5): h, p_ = sim_p() H.append(h) p[rep] = p_ # %% az.plot_kde(H) plt.xlabel("Entropy") plt.ylabel("Density") # %% np.max(H) # %% p[np.argmax(H)] # %%
""" KDE quantiles ============= _thumb: .2, .8 """ import matplotlib.pyplot as plt import numpy as np import arviz as az az.style.use("arviz-darkgrid") dist = np.random.beta(np.random.uniform(0.5, 10), 5, size=1000) az.plot_kde(dist, quantiles=[0.25, 0.5, 0.75]) plt.show()
""" 2d KDE ====== _thumb: .1, .8 """ import arviz as az import numpy as np az.style.use('arviz-darkgrid') az.plot_kde(np.random.rand(100), np.random.rand(100))
""" KDE Plot ======== _thumb: .2, .8 """ import matplotlib.pyplot as plt import numpy as np import arviz as az az.style.use("arviz-darkgrid") data = az.load_arviz_data("centered_eight") # Combine posterior draws for from xarray of (4,500) to ndarray (2000,) y_hat = np.concatenate(data.posterior_predictive["obs"].values) ax = az.plot_kde( y_hat, label="Estimated Effect\n of SAT Prep", rug=True, plot_kwargs={ "linewidth": 2, "color": "black" }, rug_kwargs={"color": "black"}, ) plt.show()
# ## Gaussian inferences # In[13]: data = np.loadtxt('../data/chemical_shifts.csv') # remove outliers using the interquartile rule #quant = np.percentile(data, [25, 75]) #iqr = quant[1] - quant[0] #upper_b = quant[1] + iqr * 1.5 #lower_b = quant[0] - iqr * 1.5 #data = data[(data > lower_b) & (data < upper_b)] #print(np.mean(data), np.std(data)) az.plot_kde(data, rug=True) plt.yticks([0], alpha=0) plt.savefig('B11197_02_07.png', dpi=300) # <img src="B11197_02_08.png" width="500"> # In[14]: with pm.Model() as model_g: μ = pm.Uniform('μ', lower=40, upper=70) σ = pm.HalfNormal('σ', sd=10) y = pm.Normal('y', mu=μ, sd=σ, observed=data) trace_g = pm.sample(3000) az.plot_trace(trace_g)
# %% N_visits = 10 afternoon = np.tile([0, 1], N_visits * N_cafes // 2) # wrap with int() to suppress warnings cafe_id = np.repeat(np.arange(0, N_cafes), N_visits) # 1-20 (minus 1 for python indexing) mu = a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon sigma = 0.5 # std dev within cafes wait = np.random.normal(loc=mu, scale=sigma, size=N_visits * N_cafes) d = pd.DataFrame(dict(cafe=cafe_id, afternoon=afternoon, wait=wait)) # %% R = pm.LKJCorr.dist(n=2, eta=2).random(size=10000) _, ax = plt.subplots(1, 1, figsize=(5, 5)) az.plot_kde(R) ax.set_xlabel("correlation") ax.set_ylabel("Density") # %% _, ax = plt.subplots(1, 1, figsize=(5, 5)) textloc = [[0, 0.5], [0, 0.8], [0.5, 0.9]] for eta, loc in zip([1, 2, 4], textloc): R = pm.LKJCorr.dist(n=2, eta=eta).random(size=10000) az.plot_kde(R) ax.text(loc[0], loc[1], "eta = %s" % (eta), horizontalalignment="center") ax.set_ylim(0, 1.1) ax.set_xlabel("correlation") ax.set_ylabel("Density")
# standardize the posterior, so it sums to 1 posterior = unstd_posterior / unstd_posterior.sum() return p_grid, posterior # %% p_grid, posterior = posterior_grid_approx(grid_points=100, success=6, tosses=9) samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True) # %% _, (ax0, ax1) = plt.subplots(1, 2, figsize=(12, 6)) ax0.plot(samples, "o", alpha=0.2) ax0.set_xlabel("sample number") ax0.set_ylabel("proportion water (p)") az.plot_kde(samples, ax=ax1) ax1.set_xlabel("proportion water (p)") ax1.set_ylabel("density") # %% sum(posterior[p_grid < 0.5]) # %% sum(samples < 0.5) / 1e4 # %% sum((samples > 0.5) & (samples < 0.75)) / 1e4 # %% np.percentile(samples, 80)
eps_real = np.random.normal(0, noiseSD, size=N) x = np.random.normal(10, 1, N) # centered on 10 y_real = alpha_real + beta_real * x y = y_real + eps_real # save untransformed data for later x_orig = x y_orig = y _, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].plot(x, y, 'C0.') ax[0].set_xlabel('x') ax[0].set_ylabel('y', rotation=0) ax[0].plot(x, y_real, 'k') az.plot_kde(y, ax=ax[1]) ax[1].set_xlabel('y') plt.tight_layout() # Fit posterior with MCMC instead of analytically (for simplicity and flexibility) # This is the same as BAP code, except we fix the noise variance to a constant. with pm.Model() as model_g: w0 = pm.Normal('w0', mu=0, sd=10) w1 = pm.Normal('w1', mu=0, sd=1) #ϵ = pm.HalfCauchy('ϵ', 5) mu = pm.Deterministic('mu', w0 + w1 * x) #y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y) y_pred = pm.Normal('y_pred', mu=mu, sd=noiseSD, observed=y) trace_g = pm.sample(1000, cores=1, chains=2)
label="Quadratic approximation") plt.legend(loc=0) plt.title(f"n = {n}") plt.xlabel("Proportion water") # %% n_samples = 10000 p = np.zeros(n_samples) p[0] = 0.5 W = 6 L = 3 for i in range(1, n_samples): p_new = stats.norm(p[i - 1], 0.1).rvs(1) if p_new < 0: p_new = -p_new if p_new > 1: p_new = 2 - p_new q0 = stats.binom.pmf(W, n=W + L, p=p[i - 1]) q1 = stats.binom.pmf(W, n=W + L, p=p_new) if stats.uniform.rvs(0, 1) < q1 / q0: p[i] = p_new else: p[i] = p[i - 1] # %% az.plot_kde(p, label="Metropolis approximation") x = np.linspace(0, 1, 100) plt.plot(x, stats.beta.pdf(x, W + 1, L + 1), "C1", label="True posterior") plt.legend()